Repository: facebookincubator/dispenso
Branch: main
Commit: c787a8423663
Files: 268
Total size: 5.4 MB

Directory structure:
gitextract_x94z546h/

├── .clang-format
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   └── feature_request.yml
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── build.yml
│       ├── codeql.yml
│       └── docs.yml
├── .gitignore
├── CHANGELOG.md
├── CMakeLists.txt
├── CMakePresets.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── benchmarks/
│   ├── CMakeLists.txt
│   ├── benchmark_common.h
│   ├── cascading_parallel_for_benchmark.cpp
│   ├── concurrent_vector_benchmark.cpp
│   ├── fast_math/
│   │   ├── CMakeLists.txt
│   │   ├── avx512_benchmarks.cpp
│   │   ├── avx_benchmarks.cpp
│   │   ├── benchmark_helpers.h
│   │   ├── benchmarks.cpp
│   │   ├── erf_benchmarks.cpp
│   │   ├── hwy_benchmarks.cpp
│   │   ├── neon_benchmarks.cpp
│   │   └── sse_benchmarks.cpp
│   ├── for_each_benchmark.cpp
│   ├── for_latency_benchmark.cpp
│   ├── future_benchmark.cpp
│   ├── graph_benchmark.cpp
│   ├── graph_scene_benchmark.cpp
│   ├── idle_pool_benchmark.cpp
│   ├── locality_benchmark.cpp
│   ├── nested_for_benchmark.cpp
│   ├── nested_pool_benchmark.cpp
│   ├── once_function_benchmark.cpp
│   ├── pipeline_benchmark.cpp
│   ├── pool_allocator_benchmark.cpp
│   ├── run_benchmarks.py
│   ├── rw_lock_benchmark.cpp
│   ├── simple_for_benchmark.cpp
│   ├── simple_pool_benchmark.cpp
│   ├── small_buffer_benchmark.cpp
│   ├── summing_for_benchmark.cpp
│   ├── tbb_compat.h
│   ├── thread_benchmark_common.h
│   ├── timed_task_benchmark.cpp
│   └── trivial_compute_benchmark.cpp
├── cmake/
│   └── DispensoConfig.cmake.in
├── codecov.yml
├── dispenso/
│   ├── CMakeLists.txt
│   ├── async_request.h
│   ├── completion_event.h
│   ├── concurrent_object_arena.h
│   ├── concurrent_vector.h
│   ├── detail/
│   │   ├── can_invoke.h
│   │   ├── completion_event_impl.h
│   │   ├── concurrent_vector_impl.h
│   │   ├── concurrent_vector_impl2.h
│   │   ├── epoch_waiter.h
│   │   ├── future_impl.h
│   │   ├── future_impl2.h
│   │   ├── graph_executor_impl.h
│   │   ├── math.h
│   │   ├── notifier_common.h
│   │   ├── once_callable_impl.h
│   │   ├── op_result.h
│   │   ├── per_thread_info.cpp
│   │   ├── per_thread_info.h
│   │   ├── pipeline_impl.h
│   │   ├── quanta.cpp
│   │   ├── quanta.h
│   │   ├── result_of.h
│   │   ├── rw_lock_impl.h
│   │   ├── small_buffer_allocator_impl.h
│   │   ├── task_set_impl.h
│   │   └── timed_task_impl.h
│   ├── dispenso.h
│   ├── fast_math/
│   │   ├── README.md
│   │   ├── detail/
│   │   │   ├── double_promote.h
│   │   │   └── fast_math_impl.h
│   │   ├── fast_math.h
│   │   ├── float_traits.h
│   │   ├── float_traits_avx.h
│   │   ├── float_traits_avx512.h
│   │   ├── float_traits_hwy.h
│   │   ├── float_traits_neon.h
│   │   ├── float_traits_x86.h
│   │   ├── simd.h
│   │   └── util.h
│   ├── for_each.h
│   ├── future.h
│   ├── graph.cpp
│   ├── graph.h
│   ├── graph_executor.cpp
│   ├── graph_executor.h
│   ├── latch.h
│   ├── once_function.h
│   ├── parallel_for.h
│   ├── pipeline.h
│   ├── platform.h
│   ├── pool_allocator.cpp
│   ├── pool_allocator.h
│   ├── priority.cpp
│   ├── priority.h
│   ├── resource_pool.h
│   ├── rw_lock.h
│   ├── schedulable.h
│   ├── small_buffer_allocator.cpp
│   ├── small_buffer_allocator.h
│   ├── small_vector.h
│   ├── spsc_ring_buffer.h
│   ├── task_set.cpp
│   ├── task_set.h
│   ├── third-party/
│   │   └── moodycamel/
│   │       ├── LICENSE.md
│   │       ├── README.txt
│   │       ├── blockingconcurrentqueue.h
│   │       ├── concurrentqueue.h
│   │       └── lightweightsemaphore.h
│   ├── thread_id.cpp
│   ├── thread_id.h
│   ├── thread_pool.cpp
│   ├── thread_pool.h
│   ├── timed_task.cpp
│   ├── timed_task.h
│   ├── timing.cpp
│   ├── timing.h
│   ├── tsan_annotations.cpp
│   ├── tsan_annotations.h
│   ├── util.h
│   └── utils/
│       └── graph_dot.h
├── docs/
│   ├── Doxyfile
│   ├── benchmarks/
│   │   ├── benchmark_results.md
│   │   ├── concurrent_vector_details.md
│   │   ├── concurrent_vector_tcmalloc_details.md
│   │   ├── for_latency_details.md
│   │   ├── future_details.md
│   │   ├── graph_details.md
│   │   ├── graph_scene_details.md
│   │   ├── idle_pool_details.md
│   │   ├── index.html
│   │   ├── nested_for_details.md
│   │   ├── nested_pool_details.md
│   │   ├── once_function_details.md
│   │   ├── pipeline_details.md
│   │   ├── pool_allocator_details.md
│   │   ├── rw_lock_details.md
│   │   ├── simple_for_details.md
│   │   ├── simple_pool_details.md
│   │   ├── small_buffer_details.md
│   │   ├── summing_for_details.md
│   │   ├── timed_task_details.md
│   │   └── trivial_compute_details.md
│   ├── building.md
│   ├── custom.css
│   ├── design/
│   │   ├── barrier_dispatch.md
│   │   ├── coroutines.md
│   │   ├── cpp20_concepts.md
│   │   ├── fast_math_roadmap.md
│   │   ├── parallel_algorithms.md
│   │   ├── release_checklist.md
│   │   └── roadmap.md
│   ├── getting_started.md
│   ├── groups.dox
│   ├── header.html
│   ├── mainpage.md
│   ├── migrating_from_openmp.md
│   ├── migrating_from_tbb.md
│   └── third-party/
│       └── doxygen-awesome/
│           ├── doxygen-awesome-darkmode-toggle.js
│           └── doxygen-awesome.css
├── examples/
│   ├── CMakeLists.txt
│   ├── concurrent_vector_example.cpp
│   ├── for_each_example.cpp
│   ├── future_example.cpp
│   ├── graph_example.cpp
│   ├── latch_example.cpp
│   ├── parallel_for_example.cpp
│   ├── pipeline_example.cpp
│   ├── resource_pool_example.cpp
│   └── task_set_example.cpp
├── results/
│   ├── android_arm64.json
│   ├── linux_x64.json
│   ├── macos_arm64.json
│   └── windows_x64.json
├── run_bench.bat
├── scripts/
│   ├── BENCHMARKING.md
│   ├── compare_benchmarks.py
│   ├── generate_charts.py
│   ├── generate_plotly_benchmarks.py
│   ├── run_benchmarks.py
│   ├── update_benchmarks.py
│   └── update_package_managers.py
└── tests/
    ├── CMakeLists.txt
    ├── async_request_test.cpp
    ├── chunked_for_test.cpp
    ├── completion_event_test.cpp
    ├── concurrent_object_arena_test.cpp
    ├── concurrent_vector_a_test.cpp
    ├── concurrent_vector_b_test.cpp
    ├── concurrent_vector_default_test.cpp
    ├── concurrent_vector_nocache_test.cpp
    ├── concurrent_vector_test_common.h
    ├── concurrent_vector_test_common_types.h
    ├── fast_math/
    │   ├── CMakeLists.txt
    │   ├── acos_test.cpp
    │   ├── asin_test.cpp
    │   ├── atan2_test.cpp
    │   ├── atan_test.cpp
    │   ├── avx512_test.cpp
    │   ├── avx_test.cpp
    │   ├── bivariate_ulp_eval.h
    │   ├── cbrt_test.cpp
    │   ├── cos_test.cpp
    │   ├── erf_test.cpp
    │   ├── eval.cpp
    │   ├── eval.h
    │   ├── exp10_test.cpp
    │   ├── exp2_test.cpp
    │   ├── exp_test.cpp
    │   ├── expm1_test.cpp
    │   ├── frexp_test.cpp
    │   ├── hwy_test.cpp
    │   ├── hypot_test.cpp
    │   ├── ldexp_test.cpp
    │   ├── log10_test.cpp
    │   ├── log1p_test.cpp
    │   ├── log2_test.cpp
    │   ├── log_test.cpp
    │   ├── neon_test.cpp
    │   ├── pow_test.cpp
    │   ├── pow_ulp_eval.cpp
    │   ├── simd_test_utils.h
    │   ├── sin_test.cpp
    │   ├── sincos_test.cpp
    │   ├── sinpi_test.cpp
    │   ├── sse_test.cpp
    │   ├── tan_test.cpp
    │   ├── tanh_test.cpp
    │   ├── test_main.cpp
    │   ├── ulp_eval.cpp
    │   └── util_test.cpp
    ├── for_each_test.cpp
    ├── forward_shared_pool.cpp
    ├── future_test.cpp
    ├── graph_test.cpp
    ├── greedy_for_ranges_test.cpp
    ├── greedy_for_test.cpp
    ├── latch_test.cpp
    ├── once_function_test.cpp
    ├── pipeline_test.cpp
    ├── pool_allocator_test.cpp
    ├── priority_test.cpp
    ├── resource_pool_test.cpp
    ├── rw_lock_test.cpp
    ├── shared_pool_test.cpp
    ├── small_buffer_allocator_test.cpp
    ├── small_vector_test.cpp
    ├── spsc_ring_buffer_test.cpp
    ├── task_set_test.cpp
    ├── test_tid.h
    ├── thread_id_test.cpp
    ├── thread_pool_test.cpp
    ├── timed_task_test.cpp
    ├── timing_test.cpp
    └── util_test.cpp

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands:   false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
  AfterClass:      false
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   false
  AfterNamespace:  false
  AfterObjCDeclaration: false
  AfterStruct:     false
  AfterUnion:      false
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit:     100
CommentPragmas:  '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat:   false
ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
  - Regex:           '^<.*\.h(pp)?>'
    Priority:        1
  - Regex:           '^<.*'
    Priority:        2
  - Regex:           '.*'
    Priority:        3
IndentCaseLabels: true
IndentWidth:     2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd:   ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
  - Language:        TextProto
    Delimiters:
      - pb
ReflowComments:  true
SortIncludes:    true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles:  false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard:        Cpp11
TabWidth:        4
UseTab:          Never
...


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug Report
description: File a bug report
title: "[Bug]: "
labels: ["bug", "triage"]
assignees:
  - graphicsMan
body:
  - type: markdown
    attributes:
      value: |
        Thanks for taking the time to fill out this bug report!
  - type: input
    id: contact
    attributes:
      label: Contact Details
      description: How can we get in touch with you if we need more info?
      placeholder: ex. email@example.com
    validations:
      required: false
  - type: textarea
    id: what-happened
    attributes:
      label: What happened?
      description: Also tell us, what did you expect to happen?
      placeholder: Tell us what you see!
      value: "A bug happened!"
    validations:
      required: true
  - type: dropdown
    id: version
    attributes:
      label: Version
      description: What version of our software are you running?
      options:
        - 1.0 (Default)
        - latest (Edge)
    validations:
      required: true
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com)
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature Request
description: File a feature request
title: "[Feature Request]: "
labels: ["feature", "request"]
assignees:
  - graphicsMan
body:
  - type: markdown
    attributes:
      value: |
        Thanks for taking the time to fill out this feature request!
  - type: input
    id: contact
    attributes:
      label: Contact Details
      description: How can we get in touch with you if we need more info?
      placeholder: ex. email@example.com
    validations:
      required: false
  - type: textarea
    id: whats-wanted
    attributes:
      label: What is the desired feature?
      description: Give some details
      value: "Details here"
    validations:
      required: true
  - type: checkboxes
    id: terms
    attributes:
      label: Code of Conduct
      description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com)
      options:
        - label: I agree to follow this project's Code of Conduct
          required: true


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
# PR Details

<!--- Provide a general summary of your changes in the Title above -->

## Description

<!--- Describe your changes in detail -->

## Related Issue

<!--- Please link to the issue here: -->

## Motivation and Context

<!--- Why is this change required? What problem does it solve? -->

## Test Plan

<!--- Please describe in detail how you tested your changes. -->
<!--- Include details of your testing environment, and the tests you ran to -->
<!--- see how your change affects other areas of the code, etc. -->

## Types of changes

<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->

- [ ] Docs change
- [ ] Refactoring
- [ ] Dependency upgrade
- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

## Checklist

<!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
<!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->

- [ ] My code follows the code style of this project.
- [ ] I have run clang-format.
- [ ] My change requires a change to the documentation.
- [ ] I have updated the documentation accordingly.
- [ ] I have read the **CONTRIBUTING** document.
- [ ] I have added tests to cover my changes.
- [ ] All new and existing tests passed, including in ASAN and TSAN modes (if available on your platform).


================================================
FILE: .github/workflows/build.yml
================================================
name: Build and test

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

permissions:
  contents: read

env:
  CTEST_OUTPUT_ON_FAILURE: 1

jobs:
  #############################################################################
  # Core platform builds - strategic sampling of OS/compiler/standard/library
  #############################################################################
  build-matrix:
    name: ${{ matrix.name }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        include:
          # Linux - GCC baseline
          - name: Linux GCC C++14
            os: ubuntu-latest
            cmake_args: -DCMAKE_CXX_STANDARD=14
          # Linux - Clang with C++20 concepts
          - name: Linux Clang C++20
            os: ubuntu-latest
            cc: clang
            cxx: clang++
            cmake_args: -DCMAKE_CXX_STANDARD=20
          # macOS - C++20 with static library
          - name: macOS C++20 static
            os: macos-latest
            cmake_args: -DCMAKE_CXX_STANDARD=20 -DDISPENSO_SHARED_LIB=OFF
          # Windows - MSVC baseline
          - name: Windows MSVC C++14
            os: windows-latest
            cmake_args: -DCMAKE_CXX_STANDARD=14
          # Windows - MSVC C++20 with static library
          - name: Windows MSVC C++20 static
            os: windows-latest
            cmake_args: -DCMAKE_CXX_STANDARD=20 -DDISPENSO_SHARED_LIB=OFF
    steps:
      - uses: actions/checkout@v4
      - name: Configure
        run: |
          mkdir build && cd build
          cmake .. -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release ${{ matrix.cmake_args }}
        env:
          CC: ${{ matrix.cc }}
          CXX: ${{ matrix.cxx }}
      - name: Build
        working-directory: ./build
        run: cmake --build . --parallel 4 --config Release
      - name: Test
        working-directory: ./build
        run: ctest -LE flaky --build-config Release

  #############################################################################
  # Architecture-specific builds
  #############################################################################
  build-linux-x86:
    name: Linux x86 (32-bit)
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install 32-bit support
        run: sudo apt-get update && sudo apt-get install -y gcc-multilib g++-multilib
      - name: Configure
        run: |
          mkdir build && cd build
          cmake .. -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_CXX_FLAGS="-m32" -DCMAKE_C_FLAGS="-m32"
      - name: Build
        working-directory: ./build
        run: cmake --build . --parallel 4
      - name: Test
        working-directory: ./build
        run: ctest -LE flaky

  build-macos-gcc:
    name: macOS GCC C++14 (ARM64)
    runs-on: macos-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up GCC
        run: |
          brew list gcc || brew install gcc
          GCC_PREFIX=$(brew --prefix gcc)
          GCC_VER=$(ls "$GCC_PREFIX/bin"/gcc-* | grep -oE '[0-9]+$' | sort -n | tail -1)
          echo "CC=$GCC_PREFIX/bin/gcc-$GCC_VER" >> $GITHUB_ENV
          echo "CXX=$GCC_PREFIX/bin/g++-$GCC_VER" >> $GITHUB_ENV
      - name: Configure
        run: |
          mkdir build && cd build
          cmake .. -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release \
            -DCMAKE_CXX_STANDARD=14
      - name: Build
        working-directory: ./build
        run: cmake --build . --parallel 4
      - name: Test
        working-directory: ./build
        run: ctest -LE flaky

  build-windows-x86:
    name: Windows x86 (32-bit)
    runs-on: windows-latest
    steps:
      - uses: actions/checkout@v4
      - name: Configure
        run: |
          mkdir build && cd build
          cmake .. -A Win32 -DDISPENSO_BUILD_TESTS=ON
      - name: Build
        working-directory: ./build
        run: cmake --build . --parallel 4 --config Release
      - name: Test
        working-directory: ./build
        run: ctest -LE flaky --build-config Release

  #############################################################################
  # Sanitizer builds - critical for concurrency library
  #############################################################################
  sanitizers:
    name: ${{ matrix.name }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        include:
          # TSan requires clang - GCC 13's TSan has spurious warnings about atomic_thread_fence
          - name: Thread Sanitizer
            cmake_args: -DTHREAD_SANITIZER=ON
            cc: clang
            cxx: clang++
          - name: Address Sanitizer
            cmake_args: -DADDRESS_SANITIZER=ON
    steps:
      - uses: actions/checkout@v4
      - name: Configure
        run: |
          mkdir build && cd build
          cmake .. -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Debug ${{ matrix.cmake_args }}
        env:
          CC: ${{ matrix.cc }}
          CXX: ${{ matrix.cxx }}
      - name: Build
        working-directory: ./build
        run: cmake --build . --parallel 4
      - name: Test
        working-directory: ./build
        run: ctest -LE flaky

  #############################################################################
  # Documentation build
  #############################################################################
  docs:
    name: Documentation
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install Doxygen
        run: sudo apt-get update && sudo apt-get install -y doxygen graphviz
      - name: Build documentation
        working-directory: ./docs
        run: doxygen Doxyfile
      - name: Check for warnings
        working-directory: ./docs
        run: |
          if [ -s doxygen_warnings.log ]; then
            echo "Doxygen warnings found:"
            cat doxygen_warnings.log
            exit 1
          fi
          echo "No Doxygen warnings."

  #############################################################################
  # Code coverage
  #############################################################################
  coverage:
    name: Code Coverage
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Install lcov
        run: sudo apt-get update && sudo apt-get install -y lcov
      - name: Configure with coverage
        run: |
          mkdir build && cd build
          cmake .. -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Debug \
            -DCMAKE_CXX_FLAGS="--coverage -fprofile-arcs -ftest-coverage -fprofile-update=atomic" \
            -DCMAKE_C_FLAGS="--coverage -fprofile-arcs -ftest-coverage -fprofile-update=atomic" \
            -DCMAKE_EXE_LINKER_FLAGS="--coverage"
      - name: Build
        working-directory: ./build
        run: cmake --build . --parallel 4
      - name: Test (stable tests)
        working-directory: ./build
        run: ctest -LE flaky
      - name: Test (TimedTask, with retries)
        working-directory: ./build
        run: ctest -L flaky -R "^TimedTaskTest\." --repeat until-pass:5
      - name: Generate coverage report
        run: |
          lcov --capture --directory build --output-file coverage.info --ignore-errors mismatch,gcov,negative
          lcov --remove coverage.info '/usr/*' '*/build/_deps/*' '*/tests/*' '*/third-party/*' '*/benchmarks/*' '*/examples/*' '*/bits/*' '*/ext/*' --output-file coverage.info --ignore-errors unused
          lcov --list coverage.info
      - name: Upload to Codecov
        uses: codecov/codecov-action@v5
        with:
          files: coverage.info
          token: ${{ secrets.CODECOV_TOKEN }}
          fail_ci_if_error: false
          verbose: true


================================================
FILE: .github/workflows/codeql.yml
================================================
name: CodeQL

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  schedule:
    - cron: '17 9 * * 1'

jobs:
  analyze:
    name: Analyze (C/C++)
    runs-on: ubuntu-latest
    permissions:
      security-events: write
      contents: read
    steps:
      - uses: actions/checkout@v4

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v4
        with:
          languages: c-cpp

      - name: Build
        run: |
          cmake -S . -B build \
            -DDISPENSO_BUILD_TESTS=OFF \
            -DDISPENSO_BUILD_BENCHMARKS=OFF \
            -DCMAKE_BUILD_TYPE=Release
          cmake --build build --parallel 4

      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v4


================================================
FILE: .github/workflows/docs.yml
================================================
name: Docs

on:
  push:
    branches: [ main ]

permissions:
  contents: write

jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4

    - name: Set version from CHANGELOG
      run: |
        VERSION=$(grep -m1 '^[0-9]' CHANGELOG.md | sed 's/ .*//')
        sed -i "s/^PROJECT_NUMBER.*/PROJECT_NUMBER         = $VERSION/" docs/Doxyfile

    - name: Doxygen Action
      uses: mattnotmitt/doxygen-action@v1
      with:
        working-directory: "docs/"
        doxyfile-path: "./Doxyfile"


    - name: Fix Doxygen output permissions and copy benchmark dashboard
      run: |
        sudo chown -R $(id -u):$(id -g) docs/doxygen
        cp -r docs/benchmarks docs/doxygen/html/benchmarks

    - name: Deploy
      uses: peaceiris/actions-gh-pages@v4
      with:
        github_token: ${{ secrets.GITHUB_TOKEN }}
        publish_dir: ./docs/doxygen/html


================================================
FILE: .gitignore
================================================
# Compiled source #
###################
*.com
*.class
*.dll
*.exe
*.o
*.so
*.a
bin
lib

# Packages #
############
# it's better to unpack these files and commit the raw source
# git has its own built in compression methods
*.7z
*.dmg
*.gz
*.iso
*.jar
*.rar
*.tar
*.zip
/.project

# generated cmake files #
#########################
*CMakeCache.txt
*.log
*.make
*.cmake
CMakeFiles
Makefile
*Dir

/build/*
docs/doxygen/

# Clang #
#########
.cache/


================================================
FILE: CHANGELOG.md
================================================
1.5.1 (March 28, 2026)

### Bug fixes
* Fixed `__ulock_wait`/`__ulock_wake` usage on macOS versions prior to 10.12 and on PowerPC where these APIs are unavailable. The ulock path is now guarded behind a runtime version check with `pthread_cond` fallback.
* Fixed ARM64 Windows build failure: `notifier_common.h` incorrectly defined `_ARM_` (32-bit ARM) instead of `_ARM64_` on ARM64 Windows, causing `winnt.h` to reference missing 32-bit ARM intrinsics.
* Fixed `platform.h` version macros not being updated for 1.5.0 release (were stuck at 1.4.1)
* Removed vestigial `CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS` from CMakeLists.txt. All public APIs now use proper `DISPENSO_DLL_ACCESS` annotations; the blanket export is no longer needed and is prohibited by vcpkg's maintainer guide.

### Build system
* Added `DISPENSO_USE_SYSTEM_CONCURRENTQUEUE` CMake option to use system-installed `moodycamel::concurrentqueue` instead of bundled copy (default OFF), for vcpkg compatibility
* Export C++ standard requirement via `target_compile_features` so downstream consumers compile with at least the same standard dispenso was built with
* Respect `BUILD_SHARED_LIBS` for `DISPENSO_SHARED_LIB` default, allowing vcpkg to control static/shared linkage

### Infrastructure
* Added package manager release automation script (`scripts/update_package_managers.py`) with post-write checksum verification, platform-aware testing, and PR body templates following each repo's CONTRIBUTING.md
* Added CodeQL security analysis workflow scoped to main branch
* Added package manager badges (vcpkg, Conan, Homebrew, MacPorts) to README
* Added release checklist documentation

1.5.0 (March 22, 2026)

### New features
* Added `SmallVector` container with configurable inline storage, reducing heap allocations for small collections
* Added `SPSCRingBuffer` lock-free single-producer single-consumer ring buffer with power-of-two optimization
* Added `scheduleBulk(count, generator)` API to ThreadPool, TaskSet, and ConcurrentTaskSet for efficient bulk task submission with reduced atomic contention
* Added random-access iterator specialization for `for_each_n`, with iterator category dispatch for optimal chunk boundary computation
* Added Mac futex-based wakeup using `os_sync_wait_on_address` (macOS 14.4+) with `__ulock_wait` fallback
* Added C++20 concept constraints for better error messages when template requirements aren't met
* Added experimental `fast_math` sublibrary with SIMD-accelerated math functions including `log2`, `exp2`, `exp`, `exp10`, `cbrt`, `sin`, `cos`, `sincos`, `asin`, and `atan2` with configurable accuracy/performance trade-offs and multiple SIMD backends (SSE4.1, AVX2, AVX512, NEON, Highway). **API unstable** — gated by `DISPENSO_BUILD_FAST_MATH` CMake option
* Added benchmark runner and chart generation scripts with multi-platform support
* Added interactive Plotly.js benchmark dashboard generator

### Performance improvements
* ThreadPool atomics simplification: replaced 3 per-task tracking atomics with `numSleeping_` + batched `workRemaining_` decrements, reducing per-task atomic operations from 5-6 to ~1 (+24% geometric mean across 568 benchmark tests)
* ThreadPool wakeup heuristic: reduced futex calls from ~1M to ~11K by only waking when capacity cannot cover queued work; `mostly_idle` benchmark 2.6x faster
* Cache-line alignment for `poolLoadFactor_` and `numThreads_` to reduce false sharing (L1 cache miss rate 16.33% → 6.92% on `schedule()` hot path)
* Graph executor optimizations: `SmallVector` for node dependents, pre-reserve capacity, inline continuation (build_big_tree 1.95x faster, build_dep_chain 2.14x faster)
* Serial pipeline SPSC optimization: dedicated executor with ring buffers for fully-serial pipelines (~33% faster)
* Inline continuation for serial pipeline stages (scheduling overhead reduced ~3x)
* Bulk wakeup with threshold-based `wakeN()`/`wakeAll()` selection for efficient bulk scheduling
* `for_each_n` converted to `scheduleBulk`: 2.1x faster at 32 threads, 1.6x at 64 threads for 100M elements
* `parallel_for` kAuto bulk scheduling: trivial_compute 52ms → 19ms at 192 threads (matching TBB)
* ConcurrentVector: non-atomic buffer pointer cache for read-hot paths (disabled on ARM), inline asm `bsr` for `detail::log2` on x86, and platform-adaptive `bucketAndSubIndexForIndex` fast path
* `OnceFunction` devirtualized: replaced vtable-based dispatch with direct function pointer, eliminating indirect call overhead
* `TaskSet`/`ConcurrentTaskSet` noWait path: replaced `shared_ptr<Atomic>` with pool-allocated single-atomic chunk index, reducing allocation overhead

### Infrastructure
* Benchmark runner (`run_benchmarks.py`) with JSON output and machine info collection
* Chart generator (`generate_charts.py`) with specialized visualizations per benchmark suite
* Multi-platform benchmark composition (`update_benchmarks.py --compose`) for unified documentation
* Prefer system-installed GoogleTest, Taskflow, and TBB in CMake with FetchContent fallback
* Added oneTBB compatibility via `tbb_compat.h` wrapper for `task_scheduler_init`
* Added BUCK targets for `idle_pool_benchmark`, `nested_pool_benchmark`, `for_each_benchmark`, and `locality_benchmark`

### Documentation
* Added examples directory with compilable example programs for each feature
* Added Getting Started guide (`docs/getting_started.md`) with inline code snippets from examples
* Added OpenMP migration guide (`docs/migrating_from_openmp.md`)
* Improved README clarity, discoverability, and feature descriptions

### CI and build improvements
* Comprehensive CI matrix: 11 jobs covering 3 architectures (x64, x86, ARM64), 3 OSes, 3 compilers (GCC, Clang, MSVC), C++14/20, TSan/ASan, code coverage, and Doxygen builds
* Added codecov.yml for enforcing 92% code coverage threshold

### Bug fixes
* Fixed ABI mismatch between exception and no-exception builds: `TaskSetBase` and `FutureImplResultMember` had conditionally compiled members that shifted struct layout depending on `-fno-exceptions`, causing crashes when translation units disagreed. Exception-related data members are now always present in the layout (with zero runtime cost when exceptions are disabled). **Note:** this changes the ABI for builds that previously used `-fno-exceptions`; recompile all code against 1.5 headers if mixing exception modes.
* Fixed `ConcurrentTaskSet` parent stack overflow when tasks recursively schedule to the same task set: self-recursive inlining via `tryExecuteNext()` repeatedly pushed the same `TaskSetBase*` onto the thread-local parent stack (depth limit 64), causing an abort under heavy inlining. Fix skips redundant push/pop when the TaskSet is already the current parent.
* Fixed pipeline `kLimited` scheduler `wait()` losing late-arriving items: the LIMITED path only drained the local queue without waiting for in-flight items, so items enqueued by a previous stage's CTS task after the drain could be permanently orphaned. Fix replaces the drain-only loop with an `outstanding_`-based spin that ensures all items complete, with `tryExecuteNext()` to keep the calling thread productive.
* Fixed `parallel_for` with `kAuto` chunking incorrectly falling back to static chunking when `maxThreads` was left at default
* Fixed `NoOpIter` missing iterator trait typedefs for C++20 compliance
* Fixed `NoOpIter::operator*()` / `operator[]` static local data race
* Fixed SmallBufferAllocator unsigned underflow where `allocSmallBuffer<1/2/3>()` returned nullptr instead of a 4-byte block
* Fixed `cpuRelax()` being a no-op on MSVC (missing `_mm_pause()` / `__yield()` intrinsics)
* Fixed x86 Windows build issues
* Fixed Doxygen documentation warnings
* Fixed pipeline exception safety: exceptions thrown in pipeline stage functors are now caught and propagated to the caller via `ConcurrentTaskSet`. Added RAII guards for stage resource cleanup, `OnceFunction::cleanupNotRun()` for proper deallocation of unexecuted tasks, and a deadlock fix in the `kLimited` scheduler's resource spin loop when exceptions leave no threads to release resources.
* Fixed MSVC lambda capture for constexpr variable
* Fixed `idle_pool_benchmark` fairness (loop bound, static scheduling, and pool placement)
* Fixed `nested_for_benchmark` incorrect loop bound, static scheduling, and pool placement

### Test improvements
* Added comprehensive tests for thread_pool spin-poll with sleep mode
* Added comprehensive task_set edge case tests
* Added comprehensive tests for concurrent_object_arena
* Added edge case tests for pool_allocator
* Added timing tests for getTime() function
* Added tests for Graph/Subgraph accessors and BiPropNode edge cases
* Added `SmallVector` test suite (43 tests)
* Added `SPSCRingBuffer` test suite (47 tests)
* Improved overall test coverage from ~89% to 96.3% (dispenso source only, excluding stdlib and third-party)

1.4.1 (January 5, 2026)

### Bug fixes and build improvements
* Fixed clock frequency calculation for mac-arm platforms
* Addressed potential race condition at TimedTaskScheduler construction
* Adjusted build platforms for better compatibility

1.4 (January 2, 2025)

### Efficiency improvements, bug and warning fixes
* Added some benchmarks and comparison with TaskFlow (thanks andre-nguyen!)
* Fixed compilation when compiling with DISPENSO_DEBUG (thanks EscapeZero!)
* Improved efficiency on Linux for infrequent thread pool usage.  Reduces polling overhead by 10x by switching to event-based wakeup instead of spin polling.
* Fix C++20 compilation issues (thanks aavbsouza!)
* Fix several build warnings (thanks SeaOtocinclus!)
* Add conda package badge, disable gtest install (thanks JeongSeok Lee!)
* Solved rare post-main shutdown issues with NewThreadInvoker
* Fixed test issues for 32-bit builds
* Fixed broken test logic for test thread IDs
* Fixed various build warnings

1.3 (April 25, 2024)

### Bug fixes, portability enhancements, and small functionality enhancements

* Fixed several generic warnings (thanks michel-slm!)
* cpuRelax added for PowerPC and ARM (thanks barracuda156!)
* Added missing header (thanks ryandesign!)
* Try to detect and add libatomic when required (thanks for discussions barracuda156!)
* Enable small buffers from small buffer allocators to go down to 4 bytes (thanks for discussion David Caruso!).  This is handy for 32-bit builds where pointers are typically 4 bytes
* Ensure that NOMINMAX is propagated for CMake Windows builds (thanks SeaOtocinclus!)
* Fix some cases using std::make_shared for types requiring large alignment, which is a bug prior to C++17 (thanks for help finding these SeaOtocinclus!)
* Set up CI on GitHub Actions, including builds for Mac and Windows in addition to Linux (thanks SeaOtocinclus!)
* Add an envinronment variable `DISPENSO_MAX_THREADS_PER_POOL` to limit max number of threads available to any thread pool.  In the spirit of `OMP_NUM_THREADS`.  (thanks Yong-Chull Jang!)
* Slight change of behavior w.r.t. use of `maxThreads` option in `ForEachOptions` and `ParForOptions` to limit concurrency the same way in both blocking and non-blocking `for_each` and `parallel_for` (thanks Arnie Yuan!)
* Various fixes to enable CMake builds on various 32-bit platforms (thanks for discussions barracuda156!)
* Updates to README

Known Issues:
* Large subset of dispenso tests are known to fail on 32-bit PPC Mac.  If you have access to such a machine and are willing to help debug, it would be appreciated!
* NewThreadInvoker can have a program shutdown race on Windows platforms if the threads launched by it are not finished running by end of main()

1.2 (December 27, 2023)

### Bug fixes and functionality enhancements

* Several small bug fixes, especially around 32-bit builds and at-exit shutdown corner cases, and TSAN finding benign races and/or causing timeout due to pathological lock-free behaviors in newer versions of TSAN
* Improve accuracy of `dispenso::getTime`
* Add C++-20-like `Latch` functionality
* Add mechanism for portable thread priorities
* Add a timed task/periodically scheduled task feature.  Average and standard deviation of the accuracy of `dispenso::TimedTaskScheduler` are both much better than `folly::FunctionScheduler` (from 2x to 10x+ depending on settings and platform)
* Enhancements to `parallel_for`
  * Add an option that allows to automatically reduce the number of threads working on a range if the work is too cheap to justify parallelization.  This can result in 3000x+ speedups for very lightweight loops
  * Resuse per-thread state containers across parallel for calls (these must block in-between, or be thread-safe types)
  * `parallel_for` functors may now be called with an input range directly instead of requiring a ChunkedRange.  This is as simple as providing a functor/lambda that takes the additional argument, just as was previously done with `ChunkedRange`.  `ChunkedRange`s still work, and this is fully backward compatible
* `ThreadPool`s have a new option for full spin polling.  This is generally best avoided, and I'd argue never to use this for the default Global thread pool, but can be useful for a subset of threads in systems that require real-time responsivity (especially, can be combined with the thread priority feature also found in this release)
* Task graph execution (thanks Roman Fedotov!).  Building and running dispenso task graphs is typically 25% faster than the (already excellent) `TaskFlow` library in our benchmarks.  Additionally, we have a partial update feature that can enable much faster (e.g. 50x faster) execution in cases where only a small percentage of task inputs are updated (think of per-frame partial scene updates in a game)

1.1 (October 1, 2022)

### Performance and functionality enhancements

* CMake changes to allow install of targets and CMake dispenso target exports (thanks jeffamstutz!)
* Addition of typical container type definitions for ConcurrentVector (thanks Michael Jung!)
* Large performance improvements for Futures and CompletionEvents on MacOs.  Resulted in order-of-magnitude speedups for those use cases on MacOs.
* Addition of new benchmark for performance with infrequent use of `parallel_for`, `for_latency_benchmark`
* Fixes to ensure `parallel_for` works with thread pools with zero threads (thanks kevinbchen!).  Further work has been done to ensure that thread pools with zero threads simply always run code inline.
* By default, the global thread pool uses one fewer thread than the machine has hardware threads.  This behavior was introduced because dispenso very often runs on the calling thread as well as pool threads, and so one fewer thread in the pool can lead to better performance.
* Update googletest version to 1.12.1 (thanks porumbes!)
* Add a utility in dispenso to get a thread ID, `threadId`.  These 64-bit IDs are unique per thread, and will not be recyled.  These values grow from zero, ensuring the caller can assume they are small if number of threads also is small (e.g. you won't have an ID of `0xdeadbeef` if you only run hundreds or thousands of threads in the lifetime of the process).
* Add a utility, `getTime`, to get time quickly.  This provides the double-precision time in seconds since the first call to `getTime` after process start.
* Use a new scheduling mechanism in the thread pool when in Windows.  This resulted in up to a 13x improvement in latency between putting items in the pool and having those items run.  This scheduling is optional, but turned off for Linux and MacOs since scheduling was already fast on those platforms.
* Optimizations to enable faster scheduling in thread pools.  This resulted in a range of 5% to 45% speedup across multiple benchmarks including `future_benchmark` and `pipeline_benchmark`.
* Fixed a performance bug in work stealing logic; now dispenso outperforms TBB in the `pipeline_benchmark`
* Added a task set cancellation feature, with a relatively simple mechanism for submitted work to check if it's owning task set has been cancelled.  When creating a task set, you can optionally opt into parent cancellation propagation as well.  While this propagation is fairly efficient, it did create a noticeable impact on performance in some cases, and thus it was decided to allow this behavior, but not penalize performance for those who don't need the behavior.

1.0 (November 24, 2021)

### dispenso initial release


================================================
FILE: CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.


cmake_minimum_required(VERSION 3.12)

# Use /Z7 (embedded debug info) instead of /Zi (PDB server) on MSVC+Ninja.
# /Zi requires mspdbsrv.exe which can fail with C1902 "Program database manager
# mismatch" when Ninja launches cl.exe as a subprocess.
if(POLICY CMP0141)
  cmake_policy(SET CMP0141 NEW)
  set(CMAKE_MSVC_DEBUG_INFORMATION_FORMAT "$<$<CONFIG:Debug,RelWithDebInfo>:Embedded>")
endif()

# CMake 4.x enables C++20 module scanning by default with Ninja, which breaks
# try_compile() checks. Dispenso doesn't use C++20 modules.
if(POLICY CMP0155)
  cmake_policy(SET CMP0155 OLD)
endif()
set(CMAKE_CXX_SCAN_FOR_MODULES OFF)

project(
  Dispenso
  VERSION 1.5.1
  DESCRIPTION "Dispenso is a library for working with sets of parallel tasks"
  LANGUAGES CXX)

if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
  set(DISPENSO_STANDALONE TRUE)
else()
  set(DISPENSO_STANDALONE FALSE)
endif()

if (DISPENSO_STANDALONE)
  include(GNUInstallDirs)
endif()

list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)

# Main project setup
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
  set(CMAKE_CXX_EXTENSIONS OFF)
  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

  # Default to BUILD_SHARED_LIBS when set (e.g. by vcpkg), otherwise ON.
  if(DEFINED BUILD_SHARED_LIBS)
    option(DISPENSO_SHARED_LIB "Build Dispenso shared library" ${BUILD_SHARED_LIBS})
  else()
    option(DISPENSO_SHARED_LIB "Build Dispenso shared library" ON)
  endif()

endif()

option(ADDRESS_SANITIZER "Use Address Sanitizer, incompatible with THREAD_SANITIZER" OFF)
option(THREAD_SANITIZER "Use Thread Sanitizer, incompatible with ADDRESS_SANITIZER" OFF)

if (ADDRESS_SANITIZER)
  add_compile_options(-fsanitize=address -fsanitize=undefined)
  add_link_options(-fsanitize=address -fsanitize=undefined)
elseif (THREAD_SANITIZER)
  add_compile_options(-fsanitize=thread)
  add_link_options(-fsanitize=thread)
endif()

set(CMAKE_CXX_STANDARD 14 CACHE STRING "the C++ standard to use for this project")

set(DISPENSO_USE_SYSTEM_CONCURRENTQUEUE OFF CACHE BOOL
    "Use system-installed moodycamel::concurrentqueue instead of bundled copy")

###########################################################
# Targets
add_subdirectory(dispenso)

set(DISPENSO_BUILD_TESTS OFF CACHE BOOL "Should tests be built?")
set(DISPENSO_BUILD_BENCHMARKS OFF CACHE BOOL "Should benchmarks be built?")
set(DISPENSO_BUILD_EXAMPLES OFF CACHE BOOL "Should examples be built?")
set(DISPENSO_BUILD_FAST_MATH OFF CACHE BOOL "Build experimental fast_math sublibrary (API unstable)")
set(DISPENSO_FAST_MATH_SIMD "none" CACHE STRING "SIMD ISA for fast_math targets: none, native, sse4.1, avx2, avx512, neon")
set(DISPENSO_FAST_MATH_HIGHWAY OFF CACHE BOOL "Enable Highway SIMD backend for fast_math (fetches Highway if not found)")

if(DISPENSO_BUILD_FAST_MATH)
  # Set compiler flags for the chosen SIMD backend.
  # Each level is cumulative: avx2 implies sse4.1, avx512 implies avx2+sse4.1.
  # "native" auto-detects all ISA extensions supported by the build machine.
  set(DISPENSO_FAST_MATH_SIMD_FLAGS "")
  if(DISPENSO_FAST_MATH_SIMD STREQUAL "native")
    if(MSVC)
      # MSVC doesn't have -march=native; AVX2 is the best portable flag.
      set(DISPENSO_FAST_MATH_SIMD_FLAGS /arch:AVX2)
    else()
      set(DISPENSO_FAST_MATH_SIMD_FLAGS -march=native)
    endif()
  elseif(DISPENSO_FAST_MATH_SIMD STREQUAL "sse4.1")
    if(NOT MSVC)
      # MSVC enables SSE4.1 by default on x64.
      set(DISPENSO_FAST_MATH_SIMD_FLAGS -msse4.1)
    endif()
  elseif(DISPENSO_FAST_MATH_SIMD STREQUAL "avx2")
    if(MSVC)
      set(DISPENSO_FAST_MATH_SIMD_FLAGS /arch:AVX2)
    else()
      set(DISPENSO_FAST_MATH_SIMD_FLAGS -msse4.1 -mavx2 -mfma)
    endif()
  elseif(DISPENSO_FAST_MATH_SIMD STREQUAL "avx512")
    if(MSVC)
      set(DISPENSO_FAST_MATH_SIMD_FLAGS /arch:AVX512)
    else()
      set(DISPENSO_FAST_MATH_SIMD_FLAGS -msse4.1 -mavx2 -mfma -mavx512f -mavx512bw -mavx512dq)
    endif()
  elseif(DISPENSO_FAST_MATH_SIMD STREQUAL "neon")
    # NEON is implicit on aarch64; no extra flags needed.
  elseif(NOT DISPENSO_FAST_MATH_SIMD STREQUAL "none")
    message(FATAL_ERROR "Unknown DISPENSO_FAST_MATH_SIMD value: ${DISPENSO_FAST_MATH_SIMD}. "
      "Valid options: none, native, sse4.1, avx2, avx512, neon")
  endif()

  # Find or fetch Highway if requested.
  if(DISPENSO_FAST_MATH_HIGHWAY)
    find_package(hwy QUIET)
    if(NOT hwy_FOUND)
      include(FetchContent)
      FetchContent_Declare(
        highway
        GIT_REPOSITORY https://github.com/google/highway.git
        GIT_TAG        1.2.0
      )
      set(HWY_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
      set(HWY_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE)
      set(HWY_ENABLE_CONTRIB OFF CACHE BOOL "" FORCE)
      FetchContent_MakeAvailable(highway)
    endif()
  endif()
endif()

if(DISPENSO_BUILD_TESTS)
  enable_testing()
  add_subdirectory(tests)
endif()

if(DISPENSO_BUILD_BENCHMARKS)
  # Sadly any given release of folly seems to have some problem or another.  Leave disabled by default.
  set(BENCHMARK_WITHOUT_FOLLY ON CACHE BOOL "Should folly benchmarks be disabled?")
  add_subdirectory(benchmarks)
endif()

if(DISPENSO_BUILD_EXAMPLES)
  add_subdirectory(examples)
endif()


================================================
FILE: CMakePresets.json
================================================
{
  "version": 6,
  "cmakeMinimumRequired": {
    "major": 3,
    "minor": 21,
    "patch": 0
  },
  "configurePresets": [
    {
      "name": "default",
      "displayName": "Default (system packages)",
      "description": "Basic build without vcpkg. Uses system-installed dependencies.",
      "binaryDir": "${sourceDir}/build/${presetName}",
      "cacheVariables": {
        "CMAKE_CXX_STANDARD": "17"
      }
    },
    {
      "name": "vcpkg",
      "displayName": "vcpkg",
      "description": "Use vcpkg for dependencies (TBB, GoogleTest, benchmark, etc.)",
      "binaryDir": "${sourceDir}/build/${presetName}",
      "toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
      "cacheVariables": {
        "CMAKE_CXX_STANDARD": "17"
      }
    },
    {
      "name": "dev",
      "displayName": "Development (tests + benchmarks)",
      "description": "Build everything for development: tests, benchmarks, and examples.",
      "inherits": "vcpkg",
      "cacheVariables": {
        "DISPENSO_BUILD_TESTS": "ON",
        "DISPENSO_BUILD_BENCHMARKS": "ON",
        "DISPENSO_BUILD_EXAMPLES": "ON",
        "CMAKE_CXX_STANDARD": "20"
      }
    },
    {
      "name": "win-dev",
      "displayName": "Windows Development (Ninja)",
      "description": "Windows dev build using Ninja for fast single-config builds. Run from a VS Developer Command Prompt.",
      "inherits": "dev",
      "binaryDir": "$env{TEMP}/dispenso-build/${presetName}",
      "generator": "Ninja",
      "cacheVariables": {
        "CMAKE_BUILD_TYPE": "Release",
        "VCPKG_APPLOCAL_DEPS": "OFF"
      },
      "condition": {
        "type": "equals",
        "lhs": "${hostSystemName}",
        "rhs": "Windows"
      }
    },
    {
      "name": "asan",
      "displayName": "Address Sanitizer",
      "inherits": "dev",
      "cacheVariables": {
        "ADDRESS_SANITIZER": "ON"
      }
    },
    {
      "name": "tsan",
      "displayName": "Thread Sanitizer",
      "inherits": "dev",
      "cacheVariables": {
        "THREAD_SANITIZER": "ON"
      }
    }
  ],
  "buildPresets": [
    {
      "name": "default",
      "configurePreset": "default",
      "configuration": "Release"
    },
    {
      "name": "dev",
      "configurePreset": "dev",
      "configuration": "Release"
    },
    {
      "name": "win-dev",
      "configurePreset": "win-dev"
    }
  ],
  "testPresets": [
    {
      "name": "dev",
      "configurePreset": "dev",
      "configuration": "Release",
      "output": {
        "outputOnFailure": true
      }
    },
    {
      "name": "win-dev",
      "configurePreset": "win-dev",
      "output": {
        "outputOnFailure": true
      }
    }
  ]
}


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
  advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
  address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <opensource-conduct@fb.com>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to dispenso
We want to make contributing to this project as easy and transparent as
possible.  There is a design ethos behind the library, so it is recommended to reach out via a GitHub 
issue on the project to discuss non-trivial changes you may wish to make.  These changes include, for 
example, wanting to change existing API, wanting to furnish a new utility, or wanting to change 
underlying behavior substantially.  Let's avoid situations where you put in a lot of hard work, only 
to have to change it substantially or get your pull request rejected.

## Our Development Process
This library has another home inside Facebook repos.  From there it is subjected to regular continuous integration testing on many platforms, and used by many projects.

## Pull Requests
We actively welcome your pull requests.

1. Fork the repo and create your branch from `master`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Utilize clang-format.
6. If you haven't already, complete the Contributor License Agreement ("CLA").

## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.

Complete your CLA here: <https://code.facebook.com/cla>

## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.

Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.

## Coding Style  
* 2 spaces for indentation rather than tabs
* 100 character line length
* Member variables have trailing underscore_
* BigCamelCase for classes and structs, and smallCamelCase for functions and variables (exception is if you are trying to match a substantial part of a standard library interface).
* [1TBS braces](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TBS_(OTBS))
* Most of all, try to be consistent with the surrounding code.  We have automated tools that will
  enforce clang-format style for some files (e.g. the C++ core) once we import your pull request
  into our internal code reviewing tools.

## License
By contributing to dispenso, you agree that your contributions will be licensed
under the LICENSE.md file in the root directory of this source tree.


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) Facebook, Inc. and its affiliates.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
[![Build and test](https://github.com/facebookincubator/dispenso/actions/workflows/build.yml/badge.svg)](https://github.com/facebookincubator/dispenso/actions/workflows/build.yml)
[![Documentation](https://img.shields.io/badge/docs-online-blue)](https://facebookincubator.github.io/dispenso)
[![codecov](https://codecov.io/gh/facebookincubator/dispenso/branch/main/graph/badge.svg)](https://codecov.io/gh/facebookincubator/dispenso)
[![Conan Center](https://img.shields.io/conan/v/dispenso)](https://conan.io/center/recipes/dispenso)
[![vcpkg](https://img.shields.io/vcpkg/v/dispenso)](https://vcpkg.io/en/package/dispenso)
[![Homebrew](https://img.shields.io/homebrew/v/dispenso)](https://formulae.brew.sh/formula/dispenso)
[![MacPorts](https://img.shields.io/badge/macports-dispenso-blue)](https://ports.macports.org/port/dispenso/)
[![Anaconda-Server Badge](https://anaconda.org/conda-forge/dispenso/badges/version.svg)](https://anaconda.org/conda-forge/dispenso)

# Dispenso

**A high-performance C++ thread pool and parallel algorithms library**

Dispenso is a modern **C++ parallel computing library** that provides work-stealing thread pools, parallel for loops, futures, task graphs, and concurrent containers. It serves as a powerful **alternative to OpenMP and Intel TBB**, offering better nested parallelism, sanitizer-clean code, and explicit thread pool control. Dispenso is used in hundreds of projects at Meta (formerly Facebook) and has been heavily tested and iterated on in production.

**Key advantages over OpenMP and TBB:**
- **No thread explosion** with nested parallel loops - dispenso's work-stealing prevents deadlocks and oversubscription
- **Clean with ASAN/TSAN** - fully sanitizer-compatible, unlike many TBB versions
- **Thread-safe shared futures** - `std::experimental::shared_future`-like API that TBB lacks, safe for multiple concurrent waiters, with much better performance than `std::future`
- **Portable** - C++14 compatible with no compiler-specific pragmas or extensions; C++20 builds gain concept constraints for clearer error messages

## Table of Contents

- [Choose Dispenso If...](#choosedispenso)
- [Features](#features)
- [Quick Start](#quickstart)
- [Comparison vs Other Libraries](#comparison)
- [Migration Guides](#migrationguides)
- [When Not to Use Dispenso](#nottouse)
- [Documentation and Examples](#examples)
- [Benchmark Results](#benchresults)
- [Installing](#installing)
- [Building](#building)
- [Known Issues](#knownissues)
- [License](#license)

<div id='choosedispenso'/>

## Choose Dispenso If...

- You need **nested parallelism** without thread explosion
- You want **sanitizer-clean** (ASAN/TSAN) concurrent code
- You want **explicit control over thread pools** rather than implicit global state
- You need **compute-bound futures**, not I/O-bound async
- You want **stable APIs** and minimal dependencies
- You need **cross-platform portability** from a C++14 baseline
- You have **multiple independent parallel loops** that can overlap (cascading `parallel_for`)

<div id='features'/>

## Features

Dispenso provides a comprehensive set of parallel programming primitives:

**Core runtime:**
* **[`ThreadPool`](https://facebookincubator.github.io/dispenso/classdispenso_1_1_thread_pool.html)** — work-stealing thread pool backing all dispenso parallelism
* **[`TaskSet`](https://facebookincubator.github.io/dispenso/classdispenso_1_1_task_set.html) / [`ConcurrentTaskSet`](https://facebookincubator.github.io/dispenso/classdispenso_1_1_concurrent_task_set.html)** — task grouping with wait, cancellation, and recursive scheduling

**Parallel algorithms:**
* **[`parallel_for`](docs/getting_started.md#your-first-parallel-loop)** — parallel loops over indices, blocking or non-blocking (cascaded); cascading `parallel_for` enables overlapping independent loops without oversubscription
* **[`for_each`](docs/getting_started.md#parallel-iteration-with-for_each)** — parallel `std::for_each` / `std::for_each_n`
* **[`Future`](docs/getting_started.md#futures-for-async-results)** — high-performance thread-safe shared futures with `then()`, `when_all()`, and an API matching `std::experimental::shared_future`
* **[`Graph`](docs/getting_started.md#task-graphs)** — task graph execution with subgraph support and incremental re-evaluation
* **[`pipeline`](docs/getting_started.md#pipelines)** — parallel pipelining of streaming workloads

**Concurrent containers and synchronization:**
* **[`ConcurrentVector`](docs/getting_started.md#concurrentvector)** — concurrent growable vector, superset of TBB `concurrent_vector` API
* **[`Latch`](docs/getting_started.md#latch)** — one-shot barrier for thread synchronization
* **[`RWLock`](https://facebookincubator.github.io/dispenso/classdispenso_1_1_r_w_lock.html)** — reader-writer spin lock, outperforms `std::shared_mutex` under low write contention
* **`SPSCRingBuffer`** — lock-free single-producer single-consumer ring buffer *(1.5.0)*

**General-purpose utilities:**
* **`SmallVector`** — inline-storage vector (not thread-aware; similar to `folly::small_vector`) *(1.5.0)*
* **`OnceFunction`** — lightweight move-only `void()` callable
* **`PoolAllocator`** — pool allocator with pluggable backing allocation (e.g. CUDA)
* **`SmallBufferAllocator`** — fast concurrent allocation for temporary objects
* **[`ResourcePool`](docs/getting_started.md#resource-pooling)** — semaphore-like guard around pooled resources
* **`CompletionEvent`** — notifiable event with wait and timed wait
* **`AsyncRequest`** — lightweight constrained message passing
* **`ConcurrentObjectArena`** — fast same-type object arena

<div id='quickstart'/>

## Quick Start

**Parallel for loop** - the most common use case:

```cpp
#include <dispenso/parallel_for.h>

// Sequential
for (size_t i = 0; i < N; ++i) {
    process(data[i]);
}

// Parallel with dispenso - just wrap it!
dispenso::parallel_for(0, N, [&](size_t i) {
    process(data[i]);
});
```

**Install via your favorite package manager:**

```bash
# Conda
conda install -c conda-forge dispenso

# Fedora/RHEL
sudo dnf install dispenso-devel

# Or build from source (see below)
```

<div id='comparison'/>

## Comparison vs Other Libraries

### TBB (Intel Threading Building Blocks)

TBB has more functionality overall, but we built dispenso for three reasons:
1. **Sanitizer compatibility** — TBB doesn't work well with ASAN/TSAN
2. **Thread-safe shared futures** — TBB lacks a futures interface; dispenso provides `std::experimental::shared_future`-like futures safe for multiple concurrent waiters
3. **Non-Intel hardware** — we needed to control performance on diverse platforms

**Performance:** Dispenso tends to be faster for small and medium parallel loops, and on par for large ones. When many loops run independently, dispenso's cascading `parallel_for` avoids oversubscription and has delivered **32-50% speedups in production workloads** after porting from TBB at Meta. TBB lacks an equivalent mechanism.

See [Migrating from TBB](docs/migrating_from_tbb.md) for a step-by-step porting guide.

### OpenMP

OpenMP has simple syntax for basic loops but grows complex for advanced constructs. Nested `#pragma omp parallel for` inside threaded code risks thread explosion and machine exhaustion. Dispenso outperforms OpenMP for medium and large loops. OpenMP has an advantage for very small loops due to direct compiler support, though dispenso's `minItemsPerChunk` option can close this gap by tuning the parallelism threshold for small/fast loops.

See [Migrating from OpenMP](docs/migrating_from_openmp.md) for a step-by-step porting guide.

### Folly

Folly excels at asynchronous I/O with coroutine support. Dispenso is designed for **compute-bound** work. Dispenso's futures are lighter-weight and faster for compute workloads; Folly is the better choice for I/O-heavy applications.

### TaskFlow

TaskFlow focuses on task graph execution. Dispenso has faster graph construction, faster full and partial graph execution, much lower `parallel_for` overhead (10-100x in benchmarks), and simpler/faster pipeline construction. TaskFlow does offer CUDA graph mappings, which dispenso does not currently provide.

### Others (GCD, C++ std parallelism)

GCD is Apple-specific with ports to other platforms. C++ parallel algorithms are still evolving — we are interested in enabling dispenso as a backend for `std::execution` and C++ coroutines. Contributions and benchmarks are welcome.

<div id='migrationguides'/>

### Migration Guides

- **[Migrating from TBB](docs/migrating_from_tbb.md)** — API mappings, thread pool differences, and common porting patterns
- **[Migrating from OpenMP](docs/migrating_from_openmp.md)** — Replacing `#pragma omp` with dispenso equivalents, handling reductions and nested parallelism

<div id='nottouse'/>

## When Not to Use Dispenso
Dispenso isn't really designed for high-latency task offload, it works best for compute-bound tasks.  Using the thread pool for networking, disk, or in cases with frequent TLB misses (really any scenario with kernel context switches) may result in less than ideal performance.

In these kernel context switch scenarios, `dispenso::Future` can be used with `dispenso::NewThreadInvoker`, which should be roughly equivalent with std::future performance.

If you need async I/O, Folly is likely a good choice (though it still doesn't fix e.g. TLB misses).

<div id='examples'/>

## Documentation and Examples
[Documentation can be found here](https://facebookincubator.github.io/dispenso)

Here are some simple examples of what you can do in dispenso. See tests and benchmarks for more examples.

### parallel\_for

A simple sequential loop can be parallelized with minimal changes:

```cpp
for(size_t j = 0; j < kLoops; ++j) {
  vec[j] = someFunction(j);
}
```

Becomes:

```cpp
dispenso::parallel_for(0, kLoops, [&vec] (size_t j) {
  vec[j] = someFunction(j);
});
```

### TaskSet

Schedule multiple tasks and wait for them to complete:

```cpp
void randomWorkConcurrently() {
  dispenso::TaskSet tasks(dispenso::globalThreadPool());
  tasks.schedule([&stateA]() { stateA = doA(); });
  tasks.schedule([]() { doB(); });
  // Do some work on current thread
  tasks.wait(); // After this, A, B done.
  tasks.schedule(doC);
  tasks.schedule([&stateD]() { doD(stateD); });
} // TaskSet's destructor waits for all scheduled tasks to finish
```

### ConcurrentTaskSet

Build a tree in parallel using recursive task scheduling:

```cpp
struct Node {
  int val;
  std::unique_ptr<Node> left, right;
};
void buildTree(dispenso::ConcurrentTaskSet& tasks, std::unique_ptr<Node>& node, int depth) {
  if (depth) {
    node = std::make_unique<Node>();
    node->val = depth;
    tasks.schedule([&tasks, &left = node->left, depth]() { buildTree(tasks, left, depth - 1); });
    tasks.schedule([&tasks, &right = node->right, depth]() { buildTree(tasks, right, depth - 1); });
  }
}
void buildTreeParallel() {
  std::unique_ptr<Node> root;
  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  buildTree(tasks, root, 20);
  tasks.wait();  // tasks would also wait here in destructor if we omitted this line
}
```

### Future

Compose asynchronous operations with futures:

```cpp
dispenso::Future<size_t> ThingProcessor::processThings() {
  auto expensiveFuture = dispenso::async([this]() {
    return processExpensiveThing(expensive_);
  });
  auto futureOfManyCheap = dispenso::async([this]() {
    size_t sum = 0;
    for (auto &thing : cheapThings_) {
      sum += processCheapThing(thing);
    }
    return sum;
  });
  return dispenso::when_all(expensiveFuture, futureOfManyCheap).then([](auto &&tuple) {
    return std::get<0>(tuple).get() + std::get<1>(tuple).get();
  });
}

auto result = thingProc->processThings();
useResult(result.get());
```

### ConcurrentVector

Safely grow a vector from multiple threads:

```cpp
ConcurrentVector<std::unique_ptr<int>> values;
dispenso::parallel_for(
  dispenso::makeChunkedRange(0, length, dispenso::ParForChunking::kStatic),
  [&values](int i, int end) {
    values.grow_by_generator(end - i, [i]() mutable { return std::make_unique<int>(i++); });
  });
```

<div id='benchresults'/>

## Benchmark Results

Dispenso is benchmarked across Linux (x64), macOS (ARM64), Windows (x64), and Android (ARM64),
comparing against OpenMP, TBB, TaskFlow, folly, and `std::async` across thread pools, parallel
loops, futures, graphs, concurrent containers, and more.

**[Interactive Benchmark Dashboard](https://facebookincubator.github.io/dispenso/benchmarks/)** — explore all results
with platform switching, dark/light theme, and detailed per-benchmark charts.

<div id='installing'/>

## Installing
Binary builds of Dispenso are available through several package managers:

- **Conda**: `conda install -c conda-forge dispenso`
- **Conan**: `conan install --requires=dispenso/1.5.0`
- **vcpkg**: `vcpkg install dispenso`
- **Homebrew**: `brew install dispenso`
- **MacPorts**: `sudo port install dispenso`
- **Fedora/RHEL**: `sudo dnf install dispenso-devel`

If your platform is not on the list, see [the next section](#building) for instructions to build from source.

[![Packaging status](https://repology.org/badge/vertical-allrepos/dispenso.svg)](https://repology.org/project/dispenso/versions)

<div id='building'/>

## Building

**Linux and macOS:**
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT
make -j
```

**Windows** (from Developer Command Prompt):
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT
cmake --build . --config Release
```

For detailed instructions including CMake prerequisites, installation, testing, and
benchmarking, see [docs/building.md](docs/building.md).

<div id='knownissues'/>

## Known Issues

* A subset of dispenso tests are known to fail on 32-bit PPC Mac.  If you have access to such a machine and are willing to help debug, it would be appreciated!

## TODO
* Enable Windows benchmarks through CMake. *(may be resolved soon — actively being worked on)*

<div id='license'/>

## License

The library is released under the MIT license, but also relies on the (excellent) moodycamel concurrentqueue library, which is released under the Simplified BSD and Zlib licenses.  See the top of the source at `dispenso/third-party/moodycamel/*.h` for details.


================================================
FILE: benchmarks/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.12)


# Try to find Taskflow: system install, DISPENSO_DEPS_DIR, ~/dispenso_deps, then FetchContent.
find_package(Taskflow QUIET)
if(NOT Taskflow_FOUND)
  # Check well-known local directories for a Taskflow source tree.
  # Searches for both "taskflow" and versioned names like "taskflow-3.11.0".
  if(DEFINED DISPENSO_DEPS_DIR)
    file(GLOB _taskflow_dep_candidates "${DISPENSO_DEPS_DIR}/taskflow*")
    list(APPEND _taskflow_search_dirs ${_taskflow_dep_candidates})
  endif()
  file(GLOB _taskflow_home_candidates "$ENV{HOME}/dispenso_deps/taskflow*")
  list(APPEND _taskflow_search_dirs ${_taskflow_home_candidates})

  set(_taskflow_local_dir "")
  foreach(_dir ${_taskflow_search_dirs})
    if(EXISTS "${_dir}/taskflow/taskflow.hpp")
      set(_taskflow_local_dir "${_dir}")
      break()
    endif()
  endforeach()

  if(_taskflow_local_dir)
    message(STATUS "Found local Taskflow at ${_taskflow_local_dir}")
    add_library(taskflow INTERFACE)
    target_include_directories(taskflow INTERFACE "${_taskflow_local_dir}")
  else()
    message(STATUS "Taskflow not found locally, fetching from GitHub...")
    include(FetchContent)
    message(STATUS "Using up-to-date taskflow")
    FetchContent_Declare(
      taskflow
      GIT_REPOSITORY https://github.com/taskflow/taskflow.git
      GIT_TAG        v3.6.0
      CONFIGURE_COMMAND ""
      BUILD_COMMAND ""
    )
    FetchContent_GetProperties(taskflow)
    if(NOT taskflow_POPULATED)
      FetchContent_Populate(taskflow)
    endif()
    FetchContent_MakeAvailable(taskflow)
    add_library(taskflow INTERFACE)
    target_include_directories(taskflow INTERFACE ${taskflow_SOURCE_DIR})
  endif()
else()
  message(STATUS "Found system Taskflow")
  if(NOT TARGET taskflow)
    add_library(taskflow INTERFACE)
    target_link_libraries(taskflow INTERFACE Taskflow::Taskflow)
  endif()
endif()

find_package(benchmark QUIET)
if(NOT benchmark_FOUND)
  message(STATUS "Google Benchmark not found locally, fetching from GitHub...")
  include(FetchContent)
  FetchContent_Declare(
    benchmark
    GIT_REPOSITORY https://github.com/google/benchmark.git
    GIT_TAG        v1.8.3
  )
  set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark tests")
  set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "Disable gtest tests")
  FetchContent_MakeAvailable(benchmark)
else()
  message(STATUS "Found system Google Benchmark")
endif()
# OpenMP support (including Windows/MSVC)
find_package(OpenMP)

# TBB support - prefer CONFIG mode to find TBBConfig.cmake from vcpkg/modern installs
find_package(TBB CONFIG QUIET)
if(NOT TBB_FOUND)
  # Fall back to old FindTBB.cmake module
  find_package(TBB QUIET)
endif()

find_package(folly)

if (WIN32)
  set (REQUIRED_LIBS dispenso benchmark::benchmark benchmark::benchmark_main taskflow)
else (WIN32)
  set (REQUIRED_LIBS dispenso benchmark::benchmark benchmark::benchmark_main pthread taskflow)
endif (WIN32)

if (TBB_FOUND)
  # Handle both old TBB (lowercase 'tbb' target) and new oneTBB (TBB::tbb target)
  if(TARGET TBB::tbb)
    set (OPTIONAL_LIBS ${OPTIONAL_LIBS} TBB::tbb)
  elseif(TARGET tbb)
    set (OPTIONAL_LIBS ${OPTIONAL_LIBS} tbb)
  else()
    set (OPTIONAL_LIBS ${OPTIONAL_LIBS} TBB::tbb)
  endif()
else (TBB_FOUND)
  add_compile_definitions(BENCHMARK_WITHOUT_TBB)
endif (TBB_FOUND)

if (OpenMP_CXX_FOUND)
  set (OPTIONAL_LIBS ${OPTIONAL_LIBS} OpenMP::OpenMP_CXX)
endif (OpenMP_CXX_FOUND)

if (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY})
  find_package(gflags)
  set (OPTIONAL_LIBS ${OPTIONAL_LIBS} ${FOLLY_LIBRARIES})
else (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY})
  add_compile_definitions(BENCHMARK_WITHOUT_FOLLY)
endif (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY})

file(GLOB BENCHMARK_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/benchmarks/*.cpp)

foreach(BENCHMARK_FILE ${BENCHMARK_FILES})
  set(BENCHMARK_NAME)
  get_filename_component(BENCHMARK_NAME ${BENCHMARK_FILE} NAME_WE)
  add_executable(${BENCHMARK_NAME} ${BENCHMARK_FILE})
  target_compile_features(${BENCHMARK_NAME} PRIVATE cxx_std_20)
  target_link_libraries(${BENCHMARK_NAME} ${REQUIRED_LIBS} ${OPTIONAL_LIBS})
endforeach()

if(DISPENSO_BUILD_FAST_MATH)
  add_subdirectory(fast_math)
endif()


================================================
FILE: benchmarks/benchmark_common.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <benchmark/benchmark.h>

#if defined(__GNUC__) || defined(__clang__)
#define UNUSED_VAR myLocalForLoopVar __attribute__((unused))
#elif defined(_MSC_VER)
#define UNUSED_VAR myLocalForLoopVar __pragma(warning(suppress : 4100))
#else
#define UNUSED_VAR myLocalForLoopVar
#endif


================================================
FILE: benchmarks/cascading_parallel_for_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * Benchmark demonstrating dispenso's cascading parallel_for advantage.
 *
 * When multiple independent parallel_for loops need to run, dispenso can
 * overlap them on a shared TaskSet using ParForOptions{.wait = false}.
 * TBB and OpenMP each impose an implicit barrier per parallel_for call,
 * forcing sequential execution of independent loops.
 */

#include <dispenso/parallel_for.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#include <array>
#include <unordered_map>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_for.h"
#include "tbb/task_group.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

static constexpr int32_t kSmallSize = 1000;
static constexpr int32_t kMediumSize = 100000;
static constexpr int32_t kLargeSize = 10000000;

static constexpr int32_t kNumLoops = 8;

// Minimum work per chunk to amortize scheduling overhead for cheap lambdas.
// With trivial compute (~4 integer ops ≈ 2ns/element), 512 elements ≈ 1µs
// of work, comfortably covering task dispatch cost on Windows.
static constexpr uint32_t kMinItemsPerChunk = 512;

static uint32_t kSeed(42);

inline int32_t compute(int32_t x) {
  return x * x - 3 * x + 7;
}

inline int32_t fuse(const std::array<int32_t, kNumLoops>& values) {
  int32_t result = 0;
  for (int32_t k = 0; k < kNumLoops; ++k) {
    result += values[static_cast<size_t>(k)];
  }
  return result;
}

struct BenchArrays {
  std::array<std::vector<int32_t>, kNumLoops> inputs;
  std::array<std::vector<int32_t>, kNumLoops> outputs;
  std::vector<int32_t> result;
};

BenchArrays& getArrays(int32_t numElements) {
  static std::unordered_map<int32_t, BenchArrays> arrays;
  auto it = arrays.find(numElements);
  if (it != arrays.end()) {
    return it->second;
  }
  srand(kSeed);
  BenchArrays ba;
  for (int32_t k = 0; k < kNumLoops; ++k) {
    ba.inputs[static_cast<size_t>(k)].reserve(static_cast<size_t>(numElements));
    for (int32_t i = 0; i < numElements; ++i) {
      ba.inputs[static_cast<size_t>(k)].push_back((rand() & 255) - 127);
    }
    ba.outputs[static_cast<size_t>(k)].resize(static_cast<size_t>(numElements), 0);
  }
  ba.result.resize(static_cast<size_t>(numElements), 0);
  auto res = arrays.emplace(numElements, std::move(ba));
  assert(res.second);
  return res.first->second;
}

void checkResults(BenchArrays& ba, int32_t numElements) {
  for (int32_t i = 0; i < numElements; ++i) {
    auto idx = static_cast<size_t>(i);
    std::array<int32_t, kNumLoops> expected;
    for (int32_t k = 0; k < kNumLoops; ++k) {
      expected[static_cast<size_t>(k)] = compute(ba.inputs[static_cast<size_t>(k)][idx]);
    }
    int32_t expectedFused = fuse(expected);
    if (ba.result[idx] != expectedFused) {
      std::cerr << "FAIL at index " << i << ": got " << ba.result[idx] << " expected "
                << expectedFused << std::endl;
      abort();
    }
  }
}

void BM_serial(benchmark::State& state) {
  const int32_t numElements = state.range(0);
  auto& ba = getArrays(numElements);

  for (auto UNUSED_VAR : state) {
    for (int32_t k = 0; k < kNumLoops; ++k) {
      auto kk = static_cast<size_t>(k);
      for (int32_t i = 0; i < numElements; ++i) {
        ba.outputs[kk][static_cast<size_t>(i)] = compute(ba.inputs[kk][static_cast<size_t>(i)]);
      }
    }
    for (int32_t i = 0; i < numElements; ++i) {
      auto idx = static_cast<size_t>(i);
      std::array<int32_t, kNumLoops> vals;
      for (int32_t k = 0; k < kNumLoops; ++k) {
        vals[static_cast<size_t>(k)] = ba.outputs[static_cast<size_t>(k)][idx];
      }
      ba.result[idx] = fuse(vals);
    }
  }
  checkResults(ba, numElements);
}

void BM_dispenso_blocking(benchmark::State& state) {
  const int32_t numThreads = state.range(0) - 1;
  const int32_t numElements = state.range(1);
  auto& ba = getArrays(numElements);

  dispenso::ThreadPool pool(numThreads);
  dispenso::ParForOptions opts;
  opts.minItemsPerChunk = kMinItemsPerChunk;

  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    for (int32_t k = 0; k < kNumLoops; ++k) {
      auto kk = static_cast<size_t>(k);
      dispenso::parallel_for(
          tasks,
          0,
          numElements,
          [&inputs = ba.inputs[kk], &outputs = ba.outputs[kk]](int32_t i) {
            outputs[static_cast<size_t>(i)] = compute(inputs[static_cast<size_t>(i)]);
          },
          opts);
    }
    dispenso::parallel_for(
        tasks,
        0,
        numElements,
        [&ba](int32_t i) {
          auto idx = static_cast<size_t>(i);
          std::array<int32_t, kNumLoops> vals;
          for (int32_t k = 0; k < kNumLoops; ++k) {
            vals[static_cast<size_t>(k)] = ba.outputs[static_cast<size_t>(k)][idx];
          }
          ba.result[idx] = fuse(vals);
        },
        opts);
  }
  checkResults(ba, numElements);
}

void BM_dispenso_cascaded(benchmark::State& state) {
  const int32_t numThreads = state.range(0) - 1;
  const int32_t numElements = state.range(1);
  auto& ba = getArrays(numElements);

  dispenso::ThreadPool pool(numThreads);
  dispenso::ParForOptions noWait;
  noWait.wait = false;
  noWait.minItemsPerChunk = kMinItemsPerChunk;
  dispenso::ParForOptions opts;
  opts.minItemsPerChunk = kMinItemsPerChunk;

  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    // First N-1 loops: non-blocking, returns immediately
    for (int32_t k = 0; k < kNumLoops - 1; ++k) {
      auto kk = static_cast<size_t>(k);
      dispenso::parallel_for(
          tasks,
          0,
          numElements,
          [&inputs = ba.inputs[kk], &outputs = ba.outputs[kk]](int32_t i) {
            outputs[static_cast<size_t>(i)] = compute(inputs[static_cast<size_t>(i)]);
          },
          noWait);
    }
    // Last independent loop: blocking — calling thread participates,
    // implicitly waits for all prior non-blocking loops too
    {
      constexpr auto kk = static_cast<size_t>(kNumLoops - 1);
      dispenso::parallel_for(
          tasks,
          0,
          numElements,
          [&inputs = ba.inputs[kk], &outputs = ba.outputs[kk]](int32_t i) {
            outputs[static_cast<size_t>(i)] = compute(inputs[static_cast<size_t>(i)]);
          },
          opts);
    }
    // Fusion: blocking (depends on all outputs being complete)
    dispenso::parallel_for(
        tasks,
        0,
        numElements,
        [&ba](int32_t i) {
          auto idx = static_cast<size_t>(i);
          std::array<int32_t, kNumLoops> vals;
          for (int32_t k = 0; k < kNumLoops; ++k) {
            vals[static_cast<size_t>(k)] = ba.outputs[static_cast<size_t>(k)][idx];
          }
          ba.result[idx] = fuse(vals);
        },
        opts);
  }
  checkResults(ba, numElements);
}

#if defined(_OPENMP)
void BM_omp(benchmark::State& state) {
  const int32_t numThreads = state.range(0);
  const int32_t numElements = state.range(1);
  auto& ba = getArrays(numElements);

  omp_set_num_threads(numThreads);

  for (auto UNUSED_VAR : state) {
    for (int32_t k = 0; k < kNumLoops; ++k) {
      auto kk = static_cast<size_t>(k);
#pragma omp parallel for
      for (int32_t i = 0; i < numElements; ++i) {
        ba.outputs[kk][static_cast<size_t>(i)] = compute(ba.inputs[kk][static_cast<size_t>(i)]);
      }
    }
#pragma omp parallel for
    for (int32_t i = 0; i < numElements; ++i) {
      auto idx = static_cast<size_t>(i);
      std::array<int32_t, kNumLoops> vals;
      for (int32_t k = 0; k < kNumLoops; ++k) {
        vals[static_cast<size_t>(k)] = ba.outputs[static_cast<size_t>(k)][idx];
      }
      ba.result[idx] = fuse(vals);
    }
  }
  checkResults(ba, numElements);
}
#endif /*defined(_OPENMP)*/

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int32_t numThreads = state.range(0);
  const int32_t numElements = state.range(1);
  auto& ba = getArrays(numElements);

  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(numThreads);

    for (int32_t k = 0; k < kNumLoops; ++k) {
      auto kk = static_cast<size_t>(k);
      tbb::parallel_for(
          tbb::blocked_range<int32_t>(0, numElements),
          [&inputs = ba.inputs[kk],
           &outputs = ba.outputs[kk]](const tbb::blocked_range<int32_t>& r) {
            for (int32_t i = r.begin(); i != r.end(); ++i) {
              outputs[static_cast<size_t>(i)] = compute(inputs[static_cast<size_t>(i)]);
            }
          });
    }
    tbb::parallel_for(
        tbb::blocked_range<int32_t>(0, numElements), [&ba](const tbb::blocked_range<int32_t>& r) {
          for (int32_t i = r.begin(); i != r.end(); ++i) {
            auto idx = static_cast<size_t>(i);
            std::array<int32_t, kNumLoops> vals;
            for (int32_t k = 0; k < kNumLoops; ++k) {
              vals[static_cast<size_t>(k)] = ba.outputs[static_cast<size_t>(k)][idx];
            }
            ba.result[idx] = fuse(vals);
          }
        });
  }
  checkResults(ba, numElements);
}

// TBB with task_group: launch all independent parallel_for loops concurrently
// via task_group, then wait — emulating dispenso's cascading behavior.
void BM_tbb_task_group(benchmark::State& state) {
  const int32_t numThreads = state.range(0);
  const int32_t numElements = state.range(1);
  auto& ba = getArrays(numElements);

  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(numThreads);
    tbb::task_group tg;

    for (int32_t k = 0; k < kNumLoops; ++k) {
      auto kk = static_cast<size_t>(k);
      tg.run([&inputs = ba.inputs[kk], &outputs = ba.outputs[kk], numElements]() {
        tbb::parallel_for(
            tbb::blocked_range<int32_t>(0, numElements),
            [&inputs, &outputs](const tbb::blocked_range<int32_t>& r) {
              for (int32_t i = r.begin(); i != r.end(); ++i) {
                outputs[static_cast<size_t>(i)] = compute(inputs[static_cast<size_t>(i)]);
              }
            });
      });
    }
    tg.wait();

    tbb::parallel_for(
        tbb::blocked_range<int32_t>(0, numElements), [&ba](const tbb::blocked_range<int32_t>& r) {
          for (int32_t i = r.begin(); i != r.end(); ++i) {
            auto idx = static_cast<size_t>(i);
            std::array<int32_t, kNumLoops> vals;
            for (int32_t k = 0; k < kNumLoops; ++k) {
              vals[static_cast<size_t>(k)] = ba.outputs[static_cast<size_t>(k)][idx];
            }
            ba.result[idx] = fuse(vals);
          }
        });
  }
  checkResults(ba, numElements);
}
#endif // !BENCHMARK_WITHOUT_TBB

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

BENCHMARK(BM_serial)->Args({kSmallSize})->Args({kMediumSize})->Args({kLargeSize})->UseRealTime();

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif // OPENMP
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_tbb_task_group)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

BENCHMARK(BM_dispenso_blocking)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_cascaded)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/concurrent_vector_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <deque>
#include <iostream>
#include <random>
#include <vector>

#include <benchmark/benchmark.h>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/concurrent_vector.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include <dispenso/concurrent_vector.h>
#include <dispenso/parallel_for.h>

#include "thread_benchmark_common.h"

constexpr size_t kLength = (1 << 20);

void checkIotaSum(int64_t sum) {
  if (sum != (static_cast<int64_t>(kLength - 1) * kLength) / 2) {
    std::cout << sum << " vs " << ((kLength - 1) * kLength) / 2 << std::endl;

    std::abort();
  }
}

template <typename Cont>
void checkIotaSum(const Cont& c, int64_t sum) {
  if (sum != (static_cast<int64_t>(kLength - 1) * kLength) / 2) {
    std::cout << sum << " vs " << ((kLength - 1) * kLength) / 2 << std::endl;

    std::vector<uint8_t> accountedFor(kLength);
    for (auto v : c) {
      accountedFor[v] = 1;
    }

    for (size_t i = 0; i < kLength; ++i) {
      if (!accountedFor[i]) {
        std::cout << "missing " << i << std::endl;
      }
    }

    std::abort();
  }
}

template <typename ContainerInit>
void pushBackImpl(benchmark::State& state, ContainerInit containerInit) {
  for (auto UNUSED_VAR : state) {
    auto values = containerInit();
    for (size_t i = 0; i < kLength; ++i) {
      values.push_back(i);
    }
  }
}

#if !defined(BENCHMARK_WITHOUT_TBB)
template <typename ContainerInit>
void pushBackGrowByAlternativeTbb(benchmark::State& state, ContainerInit containerInit) {
  for (auto UNUSED_VAR : state) {
    auto values = containerInit();
    auto it = values.grow_by(kLength);
    auto end = values.end();
    size_t i = 0;
    for (; it != end; ++it) {
      *it = i++;
    }
  }
}
#endif // !BENCHMARK_WITHOUT_TBB

template <typename ContainerInit>
void pushBackGrowByAlternativeDispenso(benchmark::State& state, ContainerInit containerInit) {
  for (auto UNUSED_VAR : state) {
    auto values = containerInit();
    values.grow_by_generator(kLength, [i = size_t{0}]() mutable { return i++; });
  }
}

void BM_std_push_back_serial(benchmark::State& state) {
  pushBackImpl(state, []() { return std::vector<int>(); });
}

void BM_deque_push_back_serial(benchmark::State& state) {
  pushBackImpl(state, []() { return std::deque<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_push_back_serial(benchmark::State& state) {
  pushBackImpl(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_push_back_serial(benchmark::State& state) {
  pushBackImpl(state, []() { return dispenso::ConcurrentVector<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_push_back_serial_grow_by_alternative(benchmark::State& state) {
  pushBackGrowByAlternativeTbb(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_push_back_serial_grow_by_alternative(benchmark::State& state) {
  pushBackGrowByAlternativeDispenso(state, []() { return dispenso::ConcurrentVector<int>(); });
}

void BM_std_push_back_serial_reserve(benchmark::State& state) {
  pushBackImpl(state, []() {
    std::vector<int> v;
    v.reserve(kLength);
    return v;
  });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_push_back_serial_reserve(benchmark::State& state) {
  pushBackImpl(state, []() {
    tbb::concurrent_vector<int> v;
    v.reserve(kLength);
    return v;
  });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_push_back_serial_reserve(benchmark::State& state) {
  pushBackImpl(
      state, []() { return dispenso::ConcurrentVector<int>(kLength, dispenso::ReserveTag); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_push_back_serial_grow_by_alternative_reserve(benchmark::State& state) {
  pushBackGrowByAlternativeTbb(state, []() {
    tbb::concurrent_vector<int> v;
    v.reserve(kLength);
    return v;
  });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_push_back_serial_grow_by_alternative_reserve(benchmark::State& state) {
  pushBackGrowByAlternativeDispenso(
      state, []() { return dispenso::ConcurrentVector<int>(kLength, dispenso::ReserveTag); });
}

template <typename ContainerInit>
void iterateImpl(benchmark::State& state, ContainerInit containerInit) {
  auto values = containerInit();
  for (size_t i = 0; i < kLength; ++i) {
    values.push_back(i);
  }
  int64_t sum;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    for (auto i : values) {
      sum += i;
    }
  }

  checkIotaSum(sum);
}

void BM_std_iterate(benchmark::State& state) {
  iterateImpl(state, []() { return std::vector<int>(); });
}

void BM_deque_iterate(benchmark::State& state) {
  iterateImpl(state, []() { return std::deque<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_iterate(benchmark::State& state) {
  iterateImpl(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_iterate(benchmark::State& state) {
  iterateImpl(state, []() { return dispenso::ConcurrentVector<int>(); });
}

template <typename T>
struct ReverseWrapper {
  T& iterable;
};

template <typename T>
auto begin(ReverseWrapper<T> w) {
  return std::rbegin(w.iterable);
}

template <typename T>
auto end(ReverseWrapper<T> w) {
  return std::rend(w.iterable);
}

template <typename T>
ReverseWrapper<T> reverse(T&& iterable) {
  return {iterable};
}

template <typename ContainerInit>
void iterateReverseImpl(benchmark::State& state, ContainerInit containerInit) {
  auto values = containerInit();
  for (size_t i = 0; i < kLength; ++i) {
    values.push_back(i);
  }
  int64_t sum;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    for (auto i : reverse(values)) {
      sum += i;
    }
  }

  checkIotaSum(sum);
}

void BM_std_iterate_reverse(benchmark::State& state) {
  iterateReverseImpl(state, []() { return std::vector<int>(); });
}

void BM_deque_iterate_reverse(benchmark::State& state) {
  iterateReverseImpl(state, []() { return std::deque<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_iterate_reverse(benchmark::State& state) {
  iterateReverseImpl(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_iterate_reverse(benchmark::State& state) {
  iterateReverseImpl(state, []() { return dispenso::ConcurrentVector<int>(); });
}

template <typename ContainerInit>
void lowerBoundImpl(benchmark::State& state, ContainerInit containerInit) {
  auto values = containerInit();
  for (size_t i = 0; i < kLength; ++i) {
    values.push_back(i);
  }
  int64_t sum;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    for (size_t i = 0; i < kLength; ++i) {
      sum += std::lower_bound(std::begin(values), std::end(values), i) - std::begin(values);
    }
  }

  checkIotaSum(sum);
}

void BM_std_lower_bound(benchmark::State& state) {
  lowerBoundImpl(state, []() { return std::vector<int>(); });
}

void BM_deque_lower_bound(benchmark::State& state) {
  lowerBoundImpl(state, []() { return std::deque<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_lower_bound(benchmark::State& state) {
  lowerBoundImpl(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_lower_bound(benchmark::State& state) {
  lowerBoundImpl(state, []() { return dispenso::ConcurrentVector<int>(); });
}

template <typename ContainerInit>
void indexImpl(benchmark::State& state, ContainerInit containerInit) {
  auto values = containerInit();
  for (size_t i = 0; i < kLength; ++i) {
    values.push_back(i);
  }
  int64_t sum;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    size_t len = values.size();
    for (size_t i = 0; i < len; ++i) {
      sum += values[i];
    }
  }

  checkIotaSum(sum);
}

void BM_std_index(benchmark::State& state) {
  indexImpl(state, []() { return std::vector<int>(); });
}

void BM_deque_index(benchmark::State& state) {
  indexImpl(state, []() { return std::deque<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_index(benchmark::State& state) {
  indexImpl(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_index(benchmark::State& state) {
  indexImpl(state, []() { return dispenso::ConcurrentVector<int>(); });
}

template <typename ContainerInit>
void randomImpl(benchmark::State& state, ContainerInit containerInit) {
  auto values = containerInit();
  std::vector<size_t> indices;
  for (size_t i = 0; i < kLength; ++i) {
    values.push_back(i);
    indices.push_back(i);
  }

  // Make this repeatable.
  std::mt19937 rng(27);
  std::shuffle(indices.begin(), indices.end(), rng);

  int64_t sum;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    for (auto i : indices) {
      sum += values[i];
    }
  }

  checkIotaSum(sum);
}

void BM_std_random(benchmark::State& state) {
  randomImpl(state, []() { return std::vector<int>(); });
}

void BM_deque_random(benchmark::State& state) {
  randomImpl(state, []() { return std::deque<int>(); });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_random(benchmark::State& state) {
  randomImpl(state, []() { return tbb::concurrent_vector<int>(); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_random(benchmark::State& state) {
  randomImpl(state, []() { return dispenso::ConcurrentVector<int>(); });
}

template <typename ContainerInit, typename ContainerPush>
void parallelImpl(
    benchmark::State& state,
    ContainerInit containerInit,
    ContainerPush containerPush) {
  for (auto UNUSED_VAR : state) {
    auto values = containerInit();
    dispenso::parallel_for(
        0, kLength, [&values, containerPush](size_t i) { containerPush(values, i); });
  }
}

void BM_std_parallel(benchmark::State& state) {
  std::mutex mtx;
  parallelImpl(
      state,
      []() { return std::vector<int>(); },
      [&mtx](std::vector<int>& c, int i) {
        std::lock_guard<std::mutex> lk(mtx);
        c.push_back(i);
      });
}

void BM_deque_parallel(benchmark::State& state) {
  std::mutex mtx;
  parallelImpl(
      state,
      []() { return std::deque<int>(); },
      [&mtx](std::deque<int>& c, int i) {
        std::lock_guard<std::mutex> lk(mtx);
        c.push_back(i);
      });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_parallel(benchmark::State& state) {
  parallelImpl(
      state,
      []() { return tbb::concurrent_vector<int>(); },
      [](tbb::concurrent_vector<int>& c, int i) { c.push_back(i); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_parallel(benchmark::State& state) {
  parallelImpl(
      state,
      []() { return dispenso::ConcurrentVector<int>(); },
      [](dispenso::ConcurrentVector<int>& c, int i) { c.push_back(i); });
}

void BM_std_parallel_reserve(benchmark::State& state) {
  std::mutex mtx;
  parallelImpl(
      state,
      []() {
        std::vector<int> v;
        v.reserve(kLength);
        return v;
      },
      [&mtx](std::vector<int>& c, int i) {
        std::lock_guard<std::mutex> lk(mtx);
        c.push_back(i);
      });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_parallel_reserve(benchmark::State& state) {
  parallelImpl(
      state,
      []() {
        tbb::concurrent_vector<int> v;
        v.reserve(kLength);
        return v;
      },
      [](tbb::concurrent_vector<int>& c, int i) { c.push_back(i); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_parallel_reserve(benchmark::State& state) {
  parallelImpl(
      state,
      []() { return dispenso::ConcurrentVector<int>(kLength, dispenso::ReserveTag); },
      [](dispenso::ConcurrentVector<int>& c, int i) { c.push_back(i); });
}

template <typename ContainerInit, typename ContainerPush>
void parallelImplClear(
    benchmark::State& state,
    ContainerInit containerInit,
    ContainerPush containerPush) {
  auto values = containerInit();

  for (auto UNUSED_VAR : state) {
    values.clear();
    dispenso::parallel_for(
        0, kLength, [&values, containerPush](size_t i) { containerPush(values, i); });
  }

  int64_t sum = 0;

  for (auto i : values) {
    sum += i;
  }

  checkIotaSum(sum);
}

void BM_std_parallel_clear(benchmark::State& state) {
  std::mutex mtx;
  parallelImplClear(
      state,
      []() { return std::vector<int>(); },
      [&mtx](std::vector<int>& c, int i) {
        std::lock_guard<std::mutex> lk(mtx);
        c.push_back(i);
      });
}

void BM_deque_parallel_clear(benchmark::State& state) {
  std::mutex mtx;
  parallelImplClear(
      state,
      []() { return std::deque<int>(); },
      [&mtx](std::deque<int>& c, int i) {
        std::lock_guard<std::mutex> lk(mtx);
        c.push_back(i);
      });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_parallel_clear(benchmark::State& state) {
  parallelImplClear(
      state,
      []() { return tbb::concurrent_vector<int>(); },
      [](tbb::concurrent_vector<int>& c, int i) { c.push_back(i); });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_parallel_clear(benchmark::State& state) {
  parallelImplClear(
      state,
      []() { return dispenso::ConcurrentVector<int>(); },
      [](dispenso::ConcurrentVector<int>& c, int i) { c.push_back(i); });
}

template <typename ContainerInit, typename ContainerPush>
void parallelImplGrowBy(
    size_t growBy,
    benchmark::State& state,
    ContainerInit containerInit,
    ContainerPush containerPush) {
  auto values = containerInit();

  for (auto UNUSED_VAR : state) {
    values.clear();
    dispenso::parallel_for(
        dispenso::makeChunkedRange(0, kLength, dispenso::ParForChunking::kStatic),
        [&values, containerPush, growBy](size_t i, size_t end) {
          while (i + growBy <= end) {
            containerPush(values, i, i + growBy);
            i += growBy;
          }
          containerPush(values, i, end);
        });
  }

  int64_t sum = 0;

  for (auto i : values) {
    sum += i;
  }

  checkIotaSum(values, sum);
}

void BM_std_parallel_grow_by_10(benchmark::State& state) {
  std::mutex mtx;
  parallelImplGrowBy(
      10,
      state,
      []() { return std::vector<int>(); },
      [&mtx](std::vector<int>& c, int i, int end) {
        std::lock_guard<std::mutex> lk(mtx);
        for (; i != end; ++i) {
          c.push_back(i);
        }
      });
}

void BM_deque_parallel_grow_by_10(benchmark::State& state) {
  std::mutex mtx;
  parallelImplGrowBy(
      10,
      state,
      []() { return std::deque<int>(); },
      [&mtx](std::deque<int>& c, int i, int end) {
        std::lock_guard<std::mutex> lk(mtx);
        for (; i != end; ++i) {
          c.push_back(i);
        }
      });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_parallel_grow_by_10(benchmark::State& state) {
  parallelImplGrowBy(
      10,
      state,
      []() { return tbb::concurrent_vector<int>(); },
      [](tbb::concurrent_vector<int>& c, int i, int end) {
        auto it = c.grow_by(end - i);
        for (; i != end; ++i, ++it) {
          *it = i;
        }
      });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_parallel_grow_by_10(benchmark::State& state) {
  parallelImplGrowBy(
      10,
      state,
      []() { return dispenso::ConcurrentVector<int>(); },
      [](dispenso::ConcurrentVector<int>& c, int i, int end) {
        c.grow_by_generator(end - i, [i]() mutable { return i++; });
      });
}

void BM_std_parallel_grow_by_100(benchmark::State& state) {
  std::mutex mtx;
  parallelImplGrowBy(
      100,
      state,
      []() { return std::vector<int>(); },
      [&mtx](std::vector<int>& c, int i, int end) {
        std::lock_guard<std::mutex> lk(mtx);
        for (; i != end; ++i) {
          c.push_back(i);
        }
      });
}

void BM_deque_parallel_grow_by_100(benchmark::State& state) {
  std::mutex mtx;
  parallelImplGrowBy(
      100,
      state,
      []() { return std::deque<int>(); },
      [&mtx](std::deque<int>& c, int i, int end) {
        std::lock_guard<std::mutex> lk(mtx);
        for (; i != end; ++i) {
          c.push_back(i);
        }
      });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_parallel_grow_by_100(benchmark::State& state) {
  parallelImplGrowBy(
      100,
      state,
      []() { return tbb::concurrent_vector<int>(); },
      [](tbb::concurrent_vector<int>& c, int i, int end) {
        auto it = c.grow_by(end - i);
        for (; i != end; ++i, ++it) {
          *it = i;
        }
      });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_parallel_grow_by_100(benchmark::State& state) {
  parallelImplGrowBy(
      100,
      state,
      []() { return dispenso::ConcurrentVector<int>(); },
      [](dispenso::ConcurrentVector<int>& c, int i, int end) {
        c.grow_by_generator(end - i, [i]() mutable { return i++; });
      });
}

template <typename ContainerInit, typename ContainerPush>
void parallelImplGrowByMax(
    benchmark::State& state,
    ContainerInit containerInit,
    ContainerPush containerPush) {
  auto values = containerInit();

  for (auto UNUSED_VAR : state) {
    values.clear();
    dispenso::parallel_for(
        dispenso::makeChunkedRange(0, kLength, dispenso::ParForChunking::kStatic),
        [&values, containerPush](size_t i, size_t end) { containerPush(values, i, end); });
  }

  int64_t sum = 0;

  for (auto i : values) {
    sum += i;
  }

  checkIotaSum(sum);
}

void BM_std_parallel_grow_by_max(benchmark::State& state) {
  std::mutex mtx;
  parallelImplGrowByMax(
      state,
      []() { return std::vector<int>(); },
      [&mtx](std::vector<int>& c, int i, int end) {
        std::lock_guard<std::mutex> lk(mtx);
        for (; i != end; ++i) {
          c.push_back(i);
        }
      });
}

void BM_deque_parallel_grow_by_max(benchmark::State& state) {
  std::mutex mtx;
  parallelImplGrowByMax(
      state,
      []() { return std::deque<int>(); },
      [&mtx](std::deque<int>& c, int i, int end) {
        std::lock_guard<std::mutex> lk(mtx);
        for (; i != end; ++i) {
          c.push_back(i);
        }
      });
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_parallel_grow_by_max(benchmark::State& state) {
  parallelImplGrowByMax(
      state,
      []() { return tbb::concurrent_vector<int>(); },
      [](tbb::concurrent_vector<int>& c, int i, int end) {
        auto it = c.grow_by(end - i);
        for (; i != end; ++i, ++it) {
          *it = i;
        }
      });
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_parallel_grow_by_max(benchmark::State& state) {
  parallelImplGrowByMax(
      state,
      []() { return dispenso::ConcurrentVector<int>(); },
      [](dispenso::ConcurrentVector<int>& c, int i, int end) {
        c.grow_by_generator(end - i, [i]() mutable { return i++; });
      });
}

BENCHMARK(BM_std_push_back_serial);
BENCHMARK(BM_deque_push_back_serial);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_push_back_serial);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_push_back_serial);

#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_push_back_serial_grow_by_alternative);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_push_back_serial_grow_by_alternative);

BENCHMARK(BM_std_push_back_serial_reserve);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_push_back_serial_reserve);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_push_back_serial_reserve);

#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_push_back_serial_grow_by_alternative_reserve);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_push_back_serial_grow_by_alternative_reserve);

BENCHMARK(BM_std_iterate);
BENCHMARK(BM_deque_iterate);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_iterate);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_iterate);

BENCHMARK(BM_std_iterate_reverse);
BENCHMARK(BM_deque_iterate_reverse);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_iterate_reverse);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_iterate_reverse);

BENCHMARK(BM_std_lower_bound);
BENCHMARK(BM_deque_lower_bound);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_lower_bound);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_lower_bound);

BENCHMARK(BM_std_index);
BENCHMARK(BM_deque_index);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_index);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_index);

BENCHMARK(BM_std_random);
BENCHMARK(BM_deque_random);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_random);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_random);

BENCHMARK(BM_std_parallel);
BENCHMARK(BM_deque_parallel);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_parallel);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_parallel);

BENCHMARK(BM_std_parallel_reserve);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_parallel_reserve);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_parallel_reserve);

BENCHMARK(BM_std_parallel_clear);
BENCHMARK(BM_deque_parallel_clear);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_parallel_clear);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_parallel_clear);

BENCHMARK(BM_std_parallel_grow_by_10);
BENCHMARK(BM_deque_parallel_grow_by_10);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_parallel_grow_by_10);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_parallel_grow_by_10);

BENCHMARK(BM_std_parallel_grow_by_100);
BENCHMARK(BM_deque_parallel_grow_by_100);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_parallel_grow_by_100);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_parallel_grow_by_100);

BENCHMARK(BM_std_parallel_grow_by_max);
BENCHMARK(BM_deque_parallel_grow_by_max);
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_parallel_grow_by_max);
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_parallel_grow_by_max);

BENCHMARK_MAIN();


================================================
FILE: benchmarks/fast_math/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.12)

# Apply SIMD ISA flags and Highway linkage from parent.
set(FAST_MATH_EXTRA_FLAGS ${DISPENSO_FAST_MATH_SIMD_FLAGS})
set(FAST_MATH_EXTRA_LIBS "")
if(DISPENSO_FAST_MATH_HIGHWAY AND TARGET hwy)
  list(APPEND FAST_MATH_EXTRA_LIBS hwy)
  # Treat Highway as a system include to suppress warnings from its headers.
  if(MSVC)
    list(APPEND FAST_MATH_EXTRA_FLAGS /external:I ${hwy_SOURCE_DIR})
  else()
    list(APPEND FAST_MATH_EXTRA_FLAGS -isystem${hwy_SOURCE_DIR})
  endif()
endif()

file(GLOB FAST_MATH_BENCHMARK_FILES CONFIGURE_DEPENDS *.cpp)

foreach(BENCHMARK_FILE ${FAST_MATH_BENCHMARK_FILES})
  set(BENCHMARK_NAME)
  get_filename_component(BENCHMARK_NAME ${BENCHMARK_FILE} NAME_WE)
  set(BENCHMARK_NAME "fast_math_${BENCHMARK_NAME}")
  add_executable(${BENCHMARK_NAME} ${BENCHMARK_FILE})
  target_compile_features(${BENCHMARK_NAME} PRIVATE cxx_std_17)
  target_compile_options(${BENCHMARK_NAME} PRIVATE ${FAST_MATH_EXTRA_FLAGS})
  target_link_libraries(${BENCHMARK_NAME} ${REQUIRED_LIBS} ${OPTIONAL_LIBS} ${FAST_MATH_EXTRA_LIBS})
endforeach()


================================================
FILE: benchmarks/fast_math/avx512_benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "benchmark_helpers.h"

#if defined(__AVX512F__)

namespace dfm = dispenso::fast_math;
namespace bench = dispenso::fast_math::bench;
using Flt = __m512;

// --- One-arg benchmarks ---

void BM_sin_avx512(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::sin(x); });
}
void BM_cos_avx512(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::cos(x); });
}
void BM_tan_avx512(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::tan(x); });
}
void BM_atan_avx512(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::atan(x); });
}
void BM_acos_avx512(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::acos(x); });
}
void BM_asin_avx512(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::asin(x); });
}

void BM_exp_avx512(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp(x); });
}
void BM_exp2_avx512(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp2(x); });
}
void BM_exp10_avx512(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp10(x); });
}
void BM_expm1_avx512(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::expm1(x); });
}

void BM_log_avx512(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log(x); });
}
void BM_log2_avx512(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log2(x); });
}
void BM_log10_avx512(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log10(x); });
}
void BM_log1p_avx512(benchmark::State& state) {
  bench::runBench(
      state, bench::sinInputs<Flt>(), [](Flt x) { return dfm::log1p(_mm512_abs_ps(x)); });
}

void BM_cbrt_avx512(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::cbrt(x); });
}

void BM_frexp_avx512(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    dfm::IntType_t<Flt> e;
    return dfm::frexp(x, &e);
  });
}
void BM_ldexp_avx512(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return dfm::ldexp(x, _mm512_set1_epi32(3)); });
}

void BM_tanh_avx512(benchmark::State& state) {
  bench::runBench(state, bench::tanhInputs<Flt>(), [](auto x) { return dfm::tanh(x); });
}
void BM_erf_avx512(benchmark::State& state) {
  bench::runBench(state, bench::erfInputs<Flt>(), [](auto x) { return dfm::erf(x); });
}

// --- Two-arg benchmarks ---

void BM_atan2_avx512(benchmark::State& state) {
  bench::runBench2(state, bench::expInputs<Flt>(), bench::sinInputs<Flt>(), [](auto y, auto x) {
    return dfm::atan2(y, x);
  });
}

void BM_hypot_avx512(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot(x, y);
  });
}
void BM_hypot_avx512_bounds(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot<Flt, dfm::MaxAccuracyTraits>(x, y);
  });
}

void BM_pow_avx512(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow(b, e);
      });
}
void BM_pow_avx512_accurate(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow<Flt, dfm::MaxAccuracyTraits>(b, e);
      });
}
void BM_pow_avx512_scalar_exp(benchmark::State& state) {
  bench::runBench(state, bench::powBaseInputs<Flt>(), [](auto x) { return dfm::pow(x, 2.5f); });
}

// --- Libc-packed comparisons (AVX-512-specific, kept hand-written) ---

void BM_hypot_libc_avx512(benchmark::State& state) {
  const auto& inputs = bench::hypotInputs<Flt>();
  const auto& inputs2 = bench::sinInputs<Flt>();
  size_t idx = 0;
  Flt sum = _mm512_setzero_ps();
  for (auto _ : state) {
    (void)_;
    alignas(64) float x[16], y[16], r[16];
    _mm512_store_ps(x, inputs[idx]);
    _mm512_store_ps(y, inputs2[idx]);
    for (int32_t i = 0; i < 16; ++i) {
      r[i] = ::hypotf(x[i], y[i]);
    }
    sum = _mm512_add_ps(sum, _mm512_load_ps(r));
    idx = (idx + 1) & bench::kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 16);
  bench::consumeResult(sum);
}

void BM_pow_libc_avx512(benchmark::State& state) {
  const auto& bases = bench::powBaseInputs<Flt>();
  const auto& exps = bench::powExpInputs<Flt>();
  size_t idx = 0;
  Flt sum = _mm512_setzero_ps();
  for (auto _ : state) {
    (void)_;
    alignas(64) float x[16], y[16], r[16];
    _mm512_store_ps(x, bases[idx]);
    _mm512_store_ps(y, exps[idx]);
    for (int32_t j = 0; j < 16; ++j)
      r[j] = ::powf(x[j], y[j]);
    sum = _mm512_add_ps(sum, _mm512_load_ps(r));
    idx = (idx + 1) & bench::kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 16);
  bench::consumeResult(sum);
}

// --- Registrations ---

BENCHMARK(BM_sin_avx512);
BENCHMARK(BM_cos_avx512);
BENCHMARK(BM_tan_avx512);
BENCHMARK(BM_atan_avx512);
BENCHMARK(BM_acos_avx512);
BENCHMARK(BM_asin_avx512);
BENCHMARK(BM_exp_avx512);
BENCHMARK(BM_exp2_avx512);
BENCHMARK(BM_exp10_avx512);
BENCHMARK(BM_expm1_avx512);
BENCHMARK(BM_log_avx512);
BENCHMARK(BM_log2_avx512);
BENCHMARK(BM_log10_avx512);
BENCHMARK(BM_log1p_avx512);
BENCHMARK(BM_cbrt_avx512);
BENCHMARK(BM_frexp_avx512);
BENCHMARK(BM_ldexp_avx512);
BENCHMARK(BM_tanh_avx512);
BENCHMARK(BM_erf_avx512);
BENCHMARK(BM_atan2_avx512);
BENCHMARK(BM_hypot_avx512);
BENCHMARK(BM_hypot_avx512_bounds);
BENCHMARK(BM_hypot_libc_avx512);
BENCHMARK(BM_pow_avx512);
BENCHMARK(BM_pow_avx512_accurate);
BENCHMARK(BM_pow_avx512_scalar_exp);
BENCHMARK(BM_pow_libc_avx512);

#else // !defined(__AVX512F__)

int main() {
  std::cout << "AVX-512 not available, skipping benchmarks." << std::endl;
  return 0;
}

#endif // defined(__AVX512F__)


================================================
FILE: benchmarks/fast_math/avx_benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "benchmark_helpers.h"

#if defined(__AVX2__)

namespace dfm = dispenso::fast_math;
namespace bench = dispenso::fast_math::bench;
using Flt = __m256;

struct BoundsTraits {
  static constexpr bool kMaxAccuracy = false;
  static constexpr bool kBoundsValues = true;
};

// --- One-arg benchmarks ---

void BM_sin_avx(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::sin(x); });
}
void BM_sin_avx_accurate(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) {
    return dfm::sin<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_cos_avx(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::cos(x); });
}
void BM_cos_avx_accurate(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) {
    return dfm::cos<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_tan_avx(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::tan(x); });
}
void BM_atan_avx(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::atan(x); });
}
void BM_acos_avx(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::acos(x); });
}
void BM_asin_avx(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::asin(x); });
}

void BM_exp_avx(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp(x); });
}
void BM_exp_avx_accurate(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) {
    return dfm::exp<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_exp_avx_bounds(benchmark::State& state) {
  bench::runBench(
      state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp<Flt, BoundsTraits>(x); });
}
void BM_exp2_avx(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp2(x); });
}
void BM_exp10_avx(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp10(x); });
}
void BM_expm1_avx(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::expm1(x); });
}

void BM_log_avx(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log(x); });
}
void BM_log_avx_accurate(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    return dfm::log<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_log2_avx(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log2(x); });
}
void BM_log10_avx(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log10(x); });
}
void BM_log1p_avx(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](Flt x) {
    Flt ax = _mm256_andnot_ps(_mm256_set1_ps(-0.0f), x);
    return dfm::log1p(ax);
  });
}

void BM_cbrt_avx(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::cbrt(x); });
}
void BM_cbrt_avx_accurate(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    return dfm::cbrt<Flt, dfm::MaxAccuracyTraits>(x);
  });
}

void BM_frexp_avx(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    dfm::IntType_t<Flt> e;
    return dfm::frexp(x, &e);
  });
}
void BM_ldexp_avx(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return dfm::ldexp(x, _mm256_set1_epi32(3)); });
}

void BM_tanh_avx(benchmark::State& state) {
  bench::runBench(state, bench::tanhInputs<Flt>(), [](auto x) { return dfm::tanh(x); });
}
void BM_erf_avx(benchmark::State& state) {
  bench::runBench(state, bench::erfInputs<Flt>(), [](auto x) { return dfm::erf(x); });
}

// --- Two-arg benchmarks ---

void BM_atan2_avx(benchmark::State& state) {
  bench::runBench2(state, bench::expInputs<Flt>(), bench::sinInputs<Flt>(), [](auto y, auto x) {
    return dfm::atan2(y, x);
  });
}

void BM_hypot_avx(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot(x, y);
  });
}
void BM_hypot_avx_bounds(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot<Flt, dfm::MaxAccuracyTraits>(x, y);
  });
}

void BM_pow_avx(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow(b, e);
      });
}
void BM_pow_avx_accurate(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow<Flt, dfm::MaxAccuracyTraits>(b, e);
      });
}
void BM_pow_avx_scalar_exp(benchmark::State& state) {
  bench::runBench(state, bench::powBaseInputs<Flt>(), [](auto x) { return dfm::pow(x, 2.5f); });
}

// --- Libc-packed comparison (AVX-specific, kept hand-written) ---

void BM_pow_libc_avx(benchmark::State& state) {
  const auto& bases = bench::powBaseInputs<Flt>();
  const auto& exps = bench::powExpInputs<Flt>();
  size_t idx = 0;
  Flt sum = _mm256_setzero_ps();
  for (auto _ : state) {
    (void)_;
    alignas(32) float x[8], y[8], r[8];
    _mm256_store_ps(x, bases[idx]);
    _mm256_store_ps(y, exps[idx]);
    for (int32_t i = 0; i < 8; ++i) {
      r[i] = ::powf(x[i], y[i]);
    }
    sum = _mm256_add_ps(sum, _mm256_load_ps(r));
    idx = (idx + 1) & bench::kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 8);
  bench::consumeResult(sum);
}

// --- Registrations ---

BENCHMARK(BM_sin_avx);
BENCHMARK(BM_sin_avx_accurate);
BENCHMARK(BM_cos_avx);
BENCHMARK(BM_cos_avx_accurate);
BENCHMARK(BM_tan_avx);
BENCHMARK(BM_atan_avx);
BENCHMARK(BM_acos_avx);
BENCHMARK(BM_asin_avx);
BENCHMARK(BM_exp_avx);
BENCHMARK(BM_exp_avx_accurate);
BENCHMARK(BM_exp_avx_bounds);
BENCHMARK(BM_exp2_avx);
BENCHMARK(BM_exp10_avx);
BENCHMARK(BM_expm1_avx);
BENCHMARK(BM_log_avx);
BENCHMARK(BM_log_avx_accurate);
BENCHMARK(BM_log2_avx);
BENCHMARK(BM_log10_avx);
BENCHMARK(BM_log1p_avx);
BENCHMARK(BM_cbrt_avx);
BENCHMARK(BM_cbrt_avx_accurate);
BENCHMARK(BM_frexp_avx);
BENCHMARK(BM_ldexp_avx);
BENCHMARK(BM_tanh_avx);
BENCHMARK(BM_erf_avx);
BENCHMARK(BM_atan2_avx);
BENCHMARK(BM_hypot_avx);
BENCHMARK(BM_hypot_avx_bounds);
BENCHMARK(BM_pow_avx);
BENCHMARK(BM_pow_avx_accurate);
BENCHMARK(BM_pow_avx_scalar_exp);
BENCHMARK(BM_pow_libc_avx);

#else // !defined(__AVX2__)

int main() {
  std::cout << "AVX2 not available, skipping benchmarks." << std::endl;
  return 0;
}

#endif // defined(__AVX2__)


================================================
FILE: benchmarks/fast_math/benchmark_helpers.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <cmath>
#include <cstdint>
#include <iostream>
#include <vector>

#include <benchmark/benchmark.h>
#include <dispenso/fast_math/fast_math.h>

#if __has_include("hwy/highway.h")
#include "hwy/highway.h"
#endif

namespace dispenso {
namespace fast_math {
namespace bench {

constexpr size_t kNumInputs = 4096;
constexpr size_t kInputsMask = 4095;
constexpr int32_t kMaxBenchLanes = 64;

// --- Per-type primitives ---
//
// Each SIMD type needs: laneCount, loadVec, zeroVec, addVec, consumeResult.
// These are template specializations so the compiler sees the exact operations
// at each call site, ensuring zero overhead.

template <typename Flt>
inline int32_t laneCount();
template <typename Flt>
inline Flt loadVec(const float* data);
template <typename Flt>
inline Flt zeroVec();
template <typename Flt>
inline Flt addVec(Flt a, Flt b);
template <typename Flt>
inline void consumeResult(Flt sum);

// --- Scalar (float) ---

template <>
inline int32_t laneCount<float>() {
  return 1;
}
template <>
inline float loadVec<float>(const float* data) {
  return *data;
}
template <>
inline float zeroVec<float>() {
  return 0.0f;
}
template <>
inline float addVec<float>(float a, float b) {
  return a + b;
}
template <>
inline void consumeResult<float>(float sum) {
  std::cout << sum << std::endl;
}

// --- SSE (__m128) ---

#if defined(__SSE4_1__)

template <>
inline int32_t laneCount<__m128>() {
  return 4;
}
template <>
inline __m128 loadVec<__m128>(const float* data) {
  return _mm_load_ps(data);
}
template <>
inline __m128 zeroVec<__m128>() {
  return _mm_setzero_ps();
}
template <>
inline __m128 addVec<__m128>(__m128 a, __m128 b) {
  return _mm_add_ps(a, b);
}
template <>
inline void consumeResult<__m128>(__m128 sum) {
  alignas(16) float buf[4];
  _mm_store_ps(buf, sum);
  std::cout << buf[0] + buf[1] + buf[2] + buf[3] << std::endl;
}

#endif // __SSE4_1__

// --- AVX (__m256) ---

#if defined(__AVX2__)

template <>
inline int32_t laneCount<__m256>() {
  return 8;
}
template <>
inline __m256 loadVec<__m256>(const float* data) {
  return _mm256_load_ps(data);
}
template <>
inline __m256 zeroVec<__m256>() {
  return _mm256_setzero_ps();
}
template <>
inline __m256 addVec<__m256>(__m256 a, __m256 b) {
  return _mm256_add_ps(a, b);
}
template <>
inline void consumeResult<__m256>(__m256 sum) {
  alignas(32) float buf[8];
  _mm256_store_ps(buf, sum);
  float total = 0.0f;
  for (int32_t i = 0; i < 8; ++i)
    total += buf[i];
  std::cout << total << std::endl;
}

#endif // __AVX2__

// --- AVX-512 (__m512) ---

#if defined(__AVX512F__)

template <>
inline int32_t laneCount<__m512>() {
  return 16;
}
template <>
inline __m512 loadVec<__m512>(const float* data) {
  return _mm512_load_ps(data);
}
template <>
inline __m512 zeroVec<__m512>() {
  return _mm512_setzero_ps();
}
template <>
inline __m512 addVec<__m512>(__m512 a, __m512 b) {
  return _mm512_add_ps(a, b);
}
template <>
inline void consumeResult<__m512>(__m512 sum) {
  alignas(64) float buf[16];
  _mm512_store_ps(buf, sum);
  float total = 0.0f;
  for (int32_t i = 0; i < 16; ++i)
    total += buf[i];
  std::cout << total << std::endl;
}

#endif // __AVX512F__

// --- NEON (float32x4_t) ---

#if defined(__aarch64__)

template <>
inline int32_t laneCount<float32x4_t>() {
  return 4;
}
template <>
inline float32x4_t loadVec<float32x4_t>(const float* data) {
  return vld1q_f32(data);
}
template <>
inline float32x4_t zeroVec<float32x4_t>() {
  return vdupq_n_f32(0.0f);
}
template <>
inline float32x4_t addVec<float32x4_t>(float32x4_t a, float32x4_t b) {
  return vaddq_f32(a, b);
}
template <>
inline void consumeResult<float32x4_t>(float32x4_t sum) {
  alignas(16) float buf[4];
  vst1q_f32(buf, sum);
  std::cout << buf[0] + buf[1] + buf[2] + buf[3] << std::endl;
}

#endif // __aarch64__

// --- Highway (HwyFloat) ---

#if __has_include("hwy/highway.h")

namespace hn = hwy::HWY_NAMESPACE;

template <>
inline int32_t laneCount<HwyFloat>() {
  return static_cast<int32_t>(hn::Lanes(HwyFloatTag()));
}
template <>
inline HwyFloat loadVec<HwyFloat>(const float* data) {
  return hn::LoadU(HwyFloatTag(), data);
}
template <>
inline HwyFloat zeroVec<HwyFloat>() {
  return hn::Zero(HwyFloatTag());
}
template <>
inline HwyFloat addVec<HwyFloat>(HwyFloat a, HwyFloat b) {
  return hn::Add(a.v, b.v);
}
template <>
inline void consumeResult<HwyFloat>(HwyFloat sum) {
  const HwyFloatTag d;
  constexpr size_t kMaxLanes = HWY_MAX_BYTES / sizeof(float);
  HWY_ALIGN float buf[kMaxLanes];
  hn::StoreU(sum.v, d, buf);
  float total = 0.0f;
  const size_t N = hn::Lanes(d);
  for (size_t i = 0; i < N; ++i) {
    total += buf[i];
  }
  std::cout << total << std::endl;
}

#endif // hwy/highway.h

// --- Input generation ---
//
// makeInputs<Flt>(lo, hi) generates kNumInputs vectors covering the scalar
// range [lo, hi] at delta = (hi - lo) / kNumInputs spacing. Each SIMD vector
// packs N consecutive values, so the total coverage is N * (hi - lo).

template <typename Flt>
inline std::vector<Flt> makeInputs(float lo, float hi) {
  const int32_t N = laneCount<Flt>();
  float delta = (hi - lo) / static_cast<float>(kNumInputs);
  std::vector<Flt> inputs;
  inputs.reserve(kNumInputs);
  alignas(64) float buf[kMaxBenchLanes];
  float f = lo;
  for (size_t i = 0; i < kNumInputs; ++i) {
    for (int32_t j = 0; j < N; ++j) {
      buf[j] = f + static_cast<float>(j) * delta;
    }
    inputs.push_back(loadVec<Flt>(buf));
    f += static_cast<float>(N) * delta;
  }
  return inputs;
}

// --- Pre-defined input factories ---
//
// These match the ranges used across all SIMD benchmark files.

template <typename Flt>
inline const std::vector<Flt>& sinInputs() {
  static auto v = makeInputs<Flt>(static_cast<float>(-M_PI / 2.0), static_cast<float>(M_PI / 2.0));
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& expInputs() {
  static auto v = makeInputs<Flt>(-10.0f, 10.0f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& logInputs() {
  static auto v = makeInputs<Flt>(0.001f, 10000.0f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& acosInputs() {
  static auto v = makeInputs<Flt>(-0.999f, 0.999f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& hypotInputs() {
  static auto v = makeInputs<Flt>(-100000.0f, 100000.0f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& tanhInputs() {
  static auto v = makeInputs<Flt>(-5.0f, 5.0f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& erfInputs() {
  static auto v = makeInputs<Flt>(-4.0f, 4.0f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& powBaseInputs() {
  static auto v = makeInputs<Flt>(0.01f, 100.0f);
  return v;
}

template <typename Flt>
inline const std::vector<Flt>& powExpInputs() {
  static auto v = makeInputs<Flt>(-8.0f, 8.0f);
  return v;
}

// --- Benchmark runners ---
//
// runBench: one-arg function benchmark. Func is a template parameter (lambda),
// so the compiler sees the exact call target and inlines at -O2. This produces
// the same assembly as hand-written code.
//
// runBench2: two-arg function benchmark (atan2, hypot, pow).

template <typename Flt, typename Func>
inline void runBench(benchmark::State& state, const std::vector<Flt>& inputs, Func fn) {
  size_t idx = 0;
  Flt sum = zeroVec<Flt>();
  for (auto _ : state) {
    (void)_;
    sum = addVec<Flt>(sum, fn(inputs[idx]));
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * static_cast<int64_t>(laneCount<Flt>()));
  consumeResult(sum);
}

template <typename Flt, typename Func>
inline void runBench2(
    benchmark::State& state,
    const std::vector<Flt>& xInputs,
    const std::vector<Flt>& yInputs,
    Func fn) {
  size_t idx = 0;
  Flt sum = zeroVec<Flt>();
  for (auto _ : state) {
    (void)_;
    sum = addVec<Flt>(sum, fn(xInputs[idx], yInputs[idx]));
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * static_cast<int64_t>(laneCount<Flt>()));
  consumeResult(sum);
}

} // namespace bench
} // namespace fast_math
} // namespace dispenso


================================================
FILE: benchmarks/fast_math/benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "benchmark_helpers.h"

namespace dfm = dispenso::fast_math;
namespace bench = dispenso::fast_math::bench;

constexpr size_t kNumInputs = 4096;
constexpr size_t kInputsMask = 4095;

struct BoundsTraits {
  static constexpr bool kMaxAccuracy = false;
  static constexpr bool kBoundsValues = true;
};

// --- Input generators (scalar-specific ranges, different from SIMD) ---

const std::vector<float>& acosInputs() {
  static std::vector<float> inputs = []() {
    float delta = 2.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = -1.0f; f <= 1.0f; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& cbrtInputs() {
  static std::vector<float> inputs = []() {
    float delta = 50000.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = -50000.f; f <= 50000.f; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& sinInputs() {
  static std::vector<float> inputs = []() {
    float delta = M_PI / kNumInputs;
    std::vector<float> inp;
    for (float f = -M_PI / 2.0; f <= M_PI / 2.0; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& logInputs() {
  static std::vector<float> inputs = []() {
    float delta = 10000.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = 0.0f; f <= 10000.0f; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& hypotInputs() {
  static std::vector<float> inputs = []() {
    float delta = 200000.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = -100000.f; f <= 100000.f; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& powBaseInputs() {
  static std::vector<float> inputs = []() {
    float delta = 99.99f / kNumInputs;
    std::vector<float> inp;
    for (float f = 0.01f; f <= 100.0f; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& powExpInputs() {
  static std::vector<float> inputs = []() {
    float delta = 16.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = -8.0f; f <= 8.0f; f += delta) {
      inp.push_back(f);
    }
    while (inp.size() < kNumInputs) {
      inp.push_back(inp.back());
    }
    return inp;
  }();
  return inputs;
}

const std::vector<float>& tanhInputs() {
  static std::vector<float> inputs = []() {
    float delta = 10.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = -5.0f; inp.size() < kNumInputs; f += delta) {
      inp.push_back(f);
    }
    return inp;
  }();
  return inputs;
}

// --- Libc benchmarks ---

void BM_acos(benchmark::State& state) {
  bench::runBench(state, acosInputs(), [](auto x) { return ::acosf(x); });
}
void BM_asin(benchmark::State& state) {
  bench::runBench(state, acosInputs(), [](auto x) { return ::asinf(x); });
}
void BM_atan(benchmark::State& state) {
  bench::runBench(state, cbrtInputs(), [](auto x) { return ::atanf(x); });
}
void BM_cbrt(benchmark::State& state) {
  bench::runBench(state, cbrtInputs(), [](auto x) { return ::cbrtf(x); });
}
void BM_sin(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::sinf(x); });
}
void BM_cos(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::cosf(x); });
}
void BM_tan(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::tanf(x); });
}
void BM_exp(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::expf(x); });
}
void BM_exp2(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::exp2f(x); });
}
void BM_exp10(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::powf(10.0f, x); });
}
void BM_log(benchmark::State& state) {
  bench::runBench(state, logInputs(), [](auto x) { return ::logf(x); });
}
void BM_log2(benchmark::State& state) {
  bench::runBench(state, logInputs(), [](auto x) { return ::log2f(x); });
}
void BM_log10(benchmark::State& state) {
  bench::runBench(state, logInputs(), [](auto x) { return ::log10f(x); });
}
void BM_expm1(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::expm1f(x); });
}
void BM_log1p(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::log1pf(std::fabs(x)); });
}
void BM_tanh(benchmark::State& state) {
  bench::runBench(state, tanhInputs(), [](auto x) { return ::tanhf(x); });
}
void BM_sin_plus_cos(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return ::sinf(x) + ::cosf(x); });
}

void BM_atan2(benchmark::State& state) {
  bench::runBench2(state, cbrtInputs(), sinInputs(), [](auto y, auto x) { return ::atan2f(y, x); });
}
void BM_hypot(benchmark::State& state) {
  bench::runBench2(
      state, hypotInputs(), sinInputs(), [](auto x, auto y) { return ::hypotf(x, y); });
}
void BM_pow(benchmark::State& state) {
  bench::runBench2(
      state, powBaseInputs(), powExpInputs(), [](auto b, auto e) { return ::powf(b, e); });
}

// frexp and ldexp use non-standard loop patterns — kept hand-written.
void BM_frexp(benchmark::State& state) {
  const auto& inputs = sinInputs();
  size_t idx = 0;
  float sum = 0.0f;
  int exp;
  int64_t expSum = 0;
  for (auto _ : state) {
    (void)_;
    sum += ::frexpf(inputs[idx], &exp);
    expSum += exp;
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << " " << expSum << std::endl;
}

void BM_ldexp(benchmark::State& state) {
  const auto& inputs = sinInputs();
  size_t idx = 0;
  float sum = 0.0f;
  for (auto _ : state) {
    (void)_;
    sum += ::ldexpf(inputs[idx], idx & 7);
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << std::endl;
}

// --- Dispenso fast_math benchmarks ---

void BM_fastm_acos(benchmark::State& state) {
  bench::runBench(state, acosInputs(), [](auto x) { return dfm::acos(x); });
}
void BM_fastm_asin(benchmark::State& state) {
  bench::runBench(state, acosInputs(), [](auto x) { return dfm::asin(x); });
}
void BM_fastm_atan(benchmark::State& state) {
  bench::runBench(state, cbrtInputs(), [](auto x) { return dfm::atan(x); });
}
void BM_fastm_cbrt(benchmark::State& state) {
  bench::runBench(state, cbrtInputs(), [](auto x) { return dfm::cbrt(x); });
}
void BM_fastm_cbrt_accurate(benchmark::State& state) {
  bench::runBench(
      state, cbrtInputs(), [](auto x) { return dfm::cbrt<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_sin(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::sin(x); });
}
void BM_fastm_sin_accurate(benchmark::State& state) {
  bench::runBench(
      state, sinInputs(), [](auto x) { return dfm::sin<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_cos(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::cos(x); });
}
void BM_fastm_cos_accurate(benchmark::State& state) {
  bench::runBench(
      state, sinInputs(), [](auto x) { return dfm::cos<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_tan(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::tan(x); });
}
void BM_fastm_tan_accurate(benchmark::State& state) {
  bench::runBench(
      state, sinInputs(), [](auto x) { return dfm::tan<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_exp(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::exp(x); });
}
void BM_fastm_exp_bounds(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::exp<float, BoundsTraits>(x); });
}
void BM_fastm_exp_accurate(benchmark::State& state) {
  bench::runBench(
      state, sinInputs(), [](auto x) { return dfm::exp<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_exp2(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::exp2(x); });
}
void BM_fastm_exp2_accurate(benchmark::State& state) {
  bench::runBench(
      state, sinInputs(), [](auto x) { return dfm::exp2<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_exp10(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::exp10(x); });
}
void BM_fastm_exp10_accurate(benchmark::State& state) {
  bench::runBench(
      state, sinInputs(), [](auto x) { return dfm::exp10<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_log(benchmark::State& state) {
  bench::runBench(state, logInputs(), [](auto x) { return dfm::log(x); });
}
void BM_fastm_log_accurate(benchmark::State& state) {
  bench::runBench(
      state, logInputs(), [](auto x) { return dfm::log<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_log2(benchmark::State& state) {
  bench::runBench(state, logInputs(), [](auto x) { return dfm::log2(x); });
}
void BM_fastm_log2_accurate(benchmark::State& state) {
  bench::runBench(
      state, logInputs(), [](auto x) { return dfm::log2<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_log10(benchmark::State& state) {
  bench::runBench(state, logInputs(), [](auto x) { return dfm::log10(x); });
}
void BM_fastm_log10_accurate(benchmark::State& state) {
  bench::runBench(
      state, logInputs(), [](auto x) { return dfm::log10<float, dfm::MaxAccuracyTraits>(x); });
}
void BM_fastm_expm1(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::expm1(x); });
}
void BM_fastm_log1p(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::log1p(std::fabs(x)); });
}
void BM_fastm_tanh(benchmark::State& state) {
  bench::runBench(state, tanhInputs(), [](auto x) { return dfm::tanh(x); });
}

void BM_fastm_atan2(benchmark::State& state) {
  bench::runBench2(
      state, cbrtInputs(), sinInputs(), [](auto y, auto x) { return dfm::atan2(y, x); });
}
void BM_fastm_atan2_bounds(benchmark::State& state) {
  bench::runBench2(state, cbrtInputs(), sinInputs(), [](auto y, auto x) {
    return dfm::atan2<float, BoundsTraits>(y, x);
  });
}
void BM_fastm_hypot(benchmark::State& state) {
  bench::runBench2(
      state, hypotInputs(), sinInputs(), [](auto x, auto y) { return dfm::hypot(x, y); });
}
void BM_fastm_hypot_bounds(benchmark::State& state) {
  bench::runBench2(state, hypotInputs(), sinInputs(), [](auto x, auto y) {
    return dfm::hypot<float, dfm::MaxAccuracyTraits>(x, y);
  });
}
void BM_naive_hypot(benchmark::State& state) {
  bench::runBench2(
      state, hypotInputs(), sinInputs(), [](auto x, auto y) { return sqrtf(fmaf(x, x, y * y)); });
}
void BM_fastm_pow(benchmark::State& state) {
  bench::runBench2(
      state, powBaseInputs(), powExpInputs(), [](auto b, auto e) { return dfm::pow(b, e); });
}
void BM_fastm_pow_accurate(benchmark::State& state) {
  bench::runBench2(state, powBaseInputs(), powExpInputs(), [](auto b, auto e) {
    return dfm::pow<float, dfm::MaxAccuracyTraits>(b, e);
  });
}

// frexp and ldexp use non-standard loop patterns — kept hand-written.
void BM_fastm_frexp(benchmark::State& state) {
  const auto& inputs = sinInputs();
  size_t idx = 0;
  float sum = 0.0f;
  int exp;
  int64_t expSum = 0;
  for (auto _ : state) {
    (void)_;
    sum += dfm::frexp(inputs[idx], &exp);
    expSum += exp;
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << " " << expSum << std::endl;
}

void BM_fastm_ldexp(benchmark::State& state) {
  const auto& inputs = sinInputs();
  size_t idx = 0;
  float sum = 0.0f;
  for (auto _ : state) {
    (void)_;
    sum += dfm::ldexp(inputs[idx], idx & 7);
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << std::endl;
}

// --- sincos / sincospi benchmarks ---

void BM_fastm_sin_plus_cos(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::sin(x) + dfm::cos(x); });
}
void BM_fastm_sincos(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) {
    float s, c;
    dfm::sincos(x, &s, &c);
    return s + c;
  });
}
void BM_fastm_sinpi(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::sinpi(x); });
}
void BM_fastm_cospi(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) { return dfm::cospi(x); });
}
void BM_fastm_sincospi(benchmark::State& state) {
  bench::runBench(state, sinInputs(), [](auto x) {
    float s, c;
    dfm::sincospi(x, &s, &c);
    return s + c;
  });
}

// --- Batch benchmarks: explicit SIMD via SseFloat (4-wide SSE) ---

#if defined(__SSE4_1__)
#include <dispenso/fast_math/float_traits_x86.h>

static void BM_batch_sinf(benchmark::State& state) {
  const auto& inputs = sinInputs();
  alignas(16) float outputs[kNumInputs];
  for (auto _ : state) {
    (void)_;
    for (size_t i = 0; i < kNumInputs; ++i) {
      outputs[i] = ::sinf(inputs[i]);
    }
    benchmark::DoNotOptimize(outputs);
  }
  state.SetItemsProcessed(state.iterations() * kNumInputs);
}

static void BM_batch_sin_scalar(benchmark::State& state) {
  const auto& inputs = sinInputs();
  alignas(16) float outputs[kNumInputs];
  for (auto _ : state) {
    (void)_;
    for (size_t i = 0; i < kNumInputs; ++i) {
      outputs[i] = dfm::sin(inputs[i]);
    }
    benchmark::DoNotOptimize(outputs);
  }
  state.SetItemsProcessed(state.iterations() * kNumInputs);
}

static void BM_batch_sin_sse(benchmark::State& state) {
  using namespace dispenso::fast_math;
  const auto& inputs = sinInputs();
  alignas(16) float outputs[kNumInputs];
  for (auto _ : state) {
    (void)_;
    for (size_t i = 0; i < kNumInputs; i += 4) {
      SseFloat x = _mm_loadu_ps(&inputs[i]);
      SseFloat r = sin<SseFloat>(x);
      _mm_storeu_ps(&outputs[i], r.v);
    }
    benchmark::DoNotOptimize(outputs);
  }
  state.SetItemsProcessed(state.iterations() * kNumInputs);
}

static void BM_batch_cos_scalar(benchmark::State& state) {
  const auto& inputs = sinInputs();
  alignas(16) float outputs[kNumInputs];
  for (auto _ : state) {
    (void)_;
    for (size_t i = 0; i < kNumInputs; ++i) {
      outputs[i] = dfm::cos(inputs[i]);
    }
    benchmark::DoNotOptimize(outputs);
  }
  state.SetItemsProcessed(state.iterations() * kNumInputs);
}

static void BM_batch_cos_sse(benchmark::State& state) {
  using namespace dispenso::fast_math;
  const auto& inputs = sinInputs();
  alignas(16) float outputs[kNumInputs];
  for (auto _ : state) {
    (void)_;
    for (size_t i = 0; i < kNumInputs; i += 4) {
      SseFloat x = _mm_loadu_ps(&inputs[i]);
      SseFloat r = cos<SseFloat>(x);
      _mm_storeu_ps(&outputs[i], r.v);
    }
    benchmark::DoNotOptimize(outputs);
  }
  state.SetItemsProcessed(state.iterations() * kNumInputs);
}

BENCHMARK(BM_batch_sinf);
BENCHMARK(BM_batch_sin_scalar);
BENCHMARK(BM_batch_sin_sse);
BENCHMARK(BM_batch_cos_scalar);
BENCHMARK(BM_batch_cos_sse);
#endif // __SSE4_1__

// --- Registrations ---

BENCHMARK(BM_acos);
BENCHMARK(BM_fastm_acos);
BENCHMARK(BM_asin);
BENCHMARK(BM_fastm_asin);
BENCHMARK(BM_atan);
BENCHMARK(BM_fastm_atan);
BENCHMARK(BM_atan2);
BENCHMARK(BM_fastm_atan2);
BENCHMARK(BM_fastm_atan2_bounds);

BENCHMARK(BM_cbrt);
BENCHMARK(BM_fastm_cbrt);
BENCHMARK(BM_fastm_cbrt_accurate);

BENCHMARK(BM_exp);
BENCHMARK(BM_fastm_exp);
BENCHMARK(BM_fastm_exp_bounds);
BENCHMARK(BM_fastm_exp_accurate);
BENCHMARK(BM_exp10);
BENCHMARK(BM_fastm_exp10);
BENCHMARK(BM_fastm_exp10_accurate);
BENCHMARK(BM_exp2);
BENCHMARK(BM_fastm_exp2);
BENCHMARK(BM_fastm_exp2_accurate);

BENCHMARK(BM_log);
BENCHMARK(BM_fastm_log);
BENCHMARK(BM_fastm_log_accurate);
BENCHMARK(BM_log2);
BENCHMARK(BM_fastm_log2);
BENCHMARK(BM_fastm_log2_accurate);
BENCHMARK(BM_log10);
BENCHMARK(BM_fastm_log10);
BENCHMARK(BM_fastm_log10_accurate);

BENCHMARK(BM_sin);
BENCHMARK(BM_fastm_sin);
BENCHMARK(BM_fastm_sin_accurate);
BENCHMARK(BM_cos);
BENCHMARK(BM_fastm_cos);
BENCHMARK(BM_fastm_cos_accurate);

BENCHMARK(BM_frexp);
BENCHMARK(BM_fastm_frexp);
BENCHMARK(BM_ldexp);
BENCHMARK(BM_fastm_ldexp);

BENCHMARK(BM_tan);
BENCHMARK(BM_fastm_tan);
BENCHMARK(BM_fastm_tan_accurate);

BENCHMARK(BM_hypot);
BENCHMARK(BM_fastm_hypot);
BENCHMARK(BM_naive_hypot);
BENCHMARK(BM_fastm_hypot_bounds);

BENCHMARK(BM_sin_plus_cos);
BENCHMARK(BM_fastm_sin_plus_cos);
BENCHMARK(BM_fastm_sincos);
BENCHMARK(BM_fastm_sinpi);
BENCHMARK(BM_fastm_cospi);
BENCHMARK(BM_fastm_sincospi);

BENCHMARK(BM_pow);
BENCHMARK(BM_fastm_pow);
BENCHMARK(BM_fastm_pow_accurate);

BENCHMARK(BM_expm1);
BENCHMARK(BM_fastm_expm1);
BENCHMARK(BM_log1p);
BENCHMARK(BM_fastm_log1p);
BENCHMARK(BM_tanh);
BENCHMARK(BM_fastm_tanh);


================================================
FILE: benchmarks/fast_math/erf_benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Benchmark: erf S16 (float, t-substitution + inline exp) vs S21 (double, pure polynomial Estrin).
// Also benchmarks libc erff for reference.

#include <cmath>
#include <cstdint>
#include <iostream>
#include <vector>

#include <benchmark/benchmark.h>
#include <dispenso/fast_math/fast_math.h>

#if defined(__SSE4_1__)
#include <immintrin.h>

#if defined(__GNUC__) || defined(__clang__)
#define UNUSED_VAR myLocalForLoopVar __attribute__((unused))
#elif defined(_MSC_VER)
#define UNUSED_VAR myLocalForLoopVar __pragma(warning(suppress : 4100))
#else
#define UNUSED_VAR myLocalForLoopVar
#endif

namespace dfm = dispenso::fast_math;

constexpr size_t kNumInputs = 4096;
constexpr size_t kInputsMask = 4095;

// --- Inputs: [-4, 4] ---

const std::vector<float>& erfScalarInputs() {
  static std::vector<float> inputs = []() {
    float delta = 8.0f / kNumInputs;
    std::vector<float> inp;
    for (float f = -4.0f; inp.size() < kNumInputs; f += delta) {
      inp.push_back(f);
    }
    return inp;
  }();
  return inputs;
}

const std::vector<__m128>& erfSseInputs() {
  static std::vector<__m128> inputs = []() {
    float delta = 8.0f / kNumInputs;
    std::vector<__m128> inp;
    float f = -4.0f;
    for (size_t i = 0; i < kNumInputs; ++i) {
      inp.emplace_back(_mm_set_ps(f + 3 * delta, f + 2 * delta, f + delta, f));
      f += 4 * delta;
    }
    return inp;
  }();
  return inputs;
}

#if defined(__AVX2__)
const std::vector<__m256>& erfAvxInputs() {
  static std::vector<__m256> inputs = []() {
    float delta = 8.0f / kNumInputs;
    std::vector<__m256> inp;
    float f = -4.0f;
    for (size_t i = 0; i < kNumInputs; ++i) {
      inp.emplace_back(_mm256_set_ps(
          f + 7 * delta,
          f + 6 * delta,
          f + 5 * delta,
          f + 4 * delta,
          f + 3 * delta,
          f + 2 * delta,
          f + delta,
          f));
      f += 8 * delta;
    }
    return inp;
  }();
  return inputs;
}
#endif

// --- S16: float, t-substitution + inline exp, 2 ULP ---

static inline float erf_s16(float x) {
  float ax = std::fabs(x);
  float result;
  if (ax >= 3.92f) {
    result = 1.0f;
  } else if (ax >= 0.875f) {
    constexpr float p = 0.45f;
    float t = 1.0f / std::fma(p, ax, 1.0f);

    constexpr float c0 = 0x1.04873ep-2f;
    constexpr float c1 = 0x1.f81fc6p-3f;
    constexpr float c2 = 0x1.189f42p-2f;
    constexpr float c3 = 0x1.15aaa6p-5f;
    constexpr float c4 = 0x1.65d24ep-2f;
    constexpr float c5 = -0x1.4432a4p-3f;

    float poly =
        t * std::fma(std::fma(std::fma(std::fma(std::fma(c5, t, c4), t, c3), t, c2), t, c1), t, c0);

    float u = ax * ax;
    constexpr float kLog2e = 0x1.715476p+0f;
    constexpr float kLn2hi = 0x1.62e400p-1f;
    constexpr float kLn2lo = 0x1.7f7d1cp-20f;
    float k = std::floor(u * kLog2e);
    float f = std::fma(k, -kLn2hi, u);
    f = std::fma(k, -kLn2lo, f);
    constexpr float e0 = 0x1.fffffep-1f, e1 = -0x1.ffff1ep-1f;
    constexpr float e2 = 0x1.ffe314p-2f, e3 = -0x1.53f876p-3f;
    constexpr float e4 = 0x1.462f16p-5f, e5 = -0x1.80e5b2p-8f;
    float exp_neg_f =
        std::fma(std::fma(std::fma(std::fma(std::fma(e5, f, e4), f, e3), f, e2), f, e1), f, e0);
    int32_t ki = static_cast<int32_t>(k);
    float pow2_neg_k = dfm::bit_cast<float>((127 - ki) << 23);

    result = 1.0f - pow2_neg_k * exp_neg_f * poly;
  } else {
    constexpr float c0 = 0x1.20dd76p+0f;
    constexpr float q0 = -0x1.812746p-2f, q1 = 0x1.ce2ec6p-4f, q2 = -0x1.b81edep-6f;
    constexpr float q3 = 0x1.556b48p-8f, q4 = -0x1.b0255p-11f, q5 = 0x1.7149c8p-14f;
    float u = ax * ax;
    float q =
        std::fma(std::fma(std::fma(std::fma(std::fma(q5, u, q4), u, q3), u, q2), u, q1), u, q0);
    result = ax * std::fma(q, u, c0);
  }
  return x < 0.0f ? -result : result;
}

// --- S21: double, pure polynomial Estrin, 1 ULP ---

static inline float erf_s21(float x) {
  double ax = std::fabs(static_cast<double>(x));
  double result;
  if (ax >= 3.92) {
    result = 1.0;
  } else {
    double u = ax * ax;

    constexpr double c0 = 0x1.20dd74ce6dac1p0;
    constexpr double c1 = -0x1.812728fedb0c3p-2;
    constexpr double c2 = 0x1.ce2c679f0f94dp-4;
    constexpr double c3 = -0x1.b81379b046993p-6;
    constexpr double c4 = 0x1.55decae500c6cp-8;
    constexpr double c5 = -0x1.bd402ca3b1d09p-11;
    constexpr double c6 = 0x1.edfbbbd68d00ep-14;
    constexpr double c7 = -0x1.d43f94bdfb90fp-17;
    constexpr double c8 = 0x1.77643daca82f5p-20;
    constexpr double c9 = -0x1.f276d5cf346ecp-24;
    constexpr double c10 = 0x1.0a42c17eedcadp-27;
    constexpr double c11 = -0x1.b999e591ae6bap-32;
    constexpr double c12 = 0x1.0f73303821975p-36;
    constexpr double c13 = -0x1.ce969a50741b3p-42;
    constexpr double c14 = 0x1.e56af7f1b38e4p-48;
    constexpr double c15 = -0x1.d6f65766c68e5p-55;

    // Estrin's scheme: 4-level tree
    double p0 = std::fma(c1, u, c0);
    double p1 = std::fma(c3, u, c2);
    double p2 = std::fma(c5, u, c4);
    double p3 = std::fma(c7, u, c6);
    double p4 = std::fma(c9, u, c8);
    double p5 = std::fma(c11, u, c10);
    double p6 = std::fma(c13, u, c12);
    double p7 = std::fma(c15, u, c14);
    double u2 = u * u;

    double q0 = std::fma(p1, u2, p0);
    double q1 = std::fma(p3, u2, p2);
    double q2 = std::fma(p5, u2, p4);
    double q3 = std::fma(p7, u2, p6);
    double u4 = u2 * u2;

    double r0 = std::fma(q1, u4, q0);
    double r1 = std::fma(q3, u4, q2);
    double u8 = u4 * u4;

    double R = std::fma(r1, u8, r0);

    result = ax * R;
  }
  return static_cast<float>(x < 0.0f ? -result : result);
}

// --- SSE S16 (float4, t-substitution + inline exp) ---

static inline __m128 erf_s16_sse(__m128 x) {
  __m128 ax = dfm::fabs(x);
  __m128 sign = _mm_and_ps(x, _mm_set1_ps(-0.0f));

  // Near-zero path: erf(x) = x * (c0 + u * Q(u))
  __m128 u = _mm_mul_ps(ax, ax);
  constexpr float nc0 = 0x1.20dd76p+0f;
  constexpr float nq0 = -0x1.812746p-2f, nq1 = 0x1.ce2ec6p-4f, nq2 = -0x1.b81edep-6f;
  constexpr float nq3 = 0x1.556b48p-8f, nq4 = -0x1.b0255p-11f, nq5 = 0x1.7149c8p-14f;
  __m128 q = _mm_fmadd_ps(
      _mm_fmadd_ps(
          _mm_fmadd_ps(
              _mm_fmadd_ps(
                  _mm_fmadd_ps(_mm_set1_ps(nq5), u, _mm_set1_ps(nq4)), u, _mm_set1_ps(nq3)),
              u,
              _mm_set1_ps(nq2)),
          u,
          _mm_set1_ps(nq1)),
      u,
      _mm_set1_ps(nq0));
  __m128 near_zero = _mm_mul_ps(ax, _mm_fmadd_ps(q, u, _mm_set1_ps(nc0)));

  // Erfc path: erf(x) = 1 - t*P(t)*exp(-x^2)
  constexpr float p = 0.45f;
  __m128 t = _mm_div_ps(_mm_set1_ps(1.0f), _mm_fmadd_ps(_mm_set1_ps(p), ax, _mm_set1_ps(1.0f)));

  constexpr float pc0 = 0x1.04873ep-2f, pc1 = 0x1.f81fc6p-3f, pc2 = 0x1.189f42p-2f;
  constexpr float pc3 = 0x1.15aaa6p-5f, pc4 = 0x1.65d24ep-2f, pc5 = -0x1.4432a4p-3f;
  __m128 ppoly = _mm_mul_ps(
      t,
      _mm_fmadd_ps(
          _mm_fmadd_ps(
              _mm_fmadd_ps(
                  _mm_fmadd_ps(
                      _mm_fmadd_ps(_mm_set1_ps(pc5), t, _mm_set1_ps(pc4)), t, _mm_set1_ps(pc3)),
                  t,
                  _mm_set1_ps(pc2)),
              t,
              _mm_set1_ps(pc1)),
          t,
          _mm_set1_ps(pc0)));

  // Inline exp(-x^2): exp(-u) = 2^(-k) * exp(-f)
  constexpr float kLog2e = 0x1.715476p+0f;
  constexpr float kLn2hi = 0x1.62e400p-1f;
  constexpr float kLn2lo = 0x1.7f7d1cp-20f;
  __m128 kv = _mm_floor_ps(_mm_mul_ps(u, _mm_set1_ps(kLog2e)));
  __m128 f = _mm_fmadd_ps(kv, _mm_set1_ps(-kLn2hi), u);
  f = _mm_fmadd_ps(kv, _mm_set1_ps(-kLn2lo), f);
  constexpr float e0 = 0x1.fffffep-1f, e1 = -0x1.ffff1ep-1f;
  constexpr float e2 = 0x1.ffe314p-2f, e3 = -0x1.53f876p-3f;
  constexpr float e4 = 0x1.462f16p-5f, e5 = -0x1.80e5b2p-8f;
  __m128 exp_neg_f = _mm_fmadd_ps(
      _mm_fmadd_ps(
          _mm_fmadd_ps(
              _mm_fmadd_ps(_mm_fmadd_ps(_mm_set1_ps(e5), f, _mm_set1_ps(e4)), f, _mm_set1_ps(e3)),
              f,
              _mm_set1_ps(e2)),
          f,
          _mm_set1_ps(e1)),
      f,
      _mm_set1_ps(e0));
  __m128i ki = _mm_cvtps_epi32(kv);
  __m128 pow2_neg_k = _mm_castsi128_ps(_mm_slli_epi32(_mm_sub_epi32(_mm_set1_epi32(127), ki), 23));

  __m128 erfc_result =
      _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_mm_mul_ps(pow2_neg_k, exp_neg_f), ppoly));

  // Blend: use near_zero for |x| < 0.875, erfc for |x| >= 0.875
  __m128 use_erfc = _mm_cmpge_ps(ax, _mm_set1_ps(0.875f));
  __m128 result = _mm_blendv_ps(near_zero, erfc_result, use_erfc);

  // Clamp to 1 for |x| >= 3.92
  __m128 saturated = _mm_cmpge_ps(ax, _mm_set1_ps(3.92f));
  result = _mm_blendv_ps(result, _mm_set1_ps(1.0f), saturated);

  // Restore sign
  return _mm_or_ps(result, sign);
}

// --- SSE S21 (double poly Estrin, process 4 floats via 2x double2) ---

static inline __m128 erf_s21_sse(__m128 x) {
  __m128 ax_f = dfm::fabs(x);
  __m128 sign = _mm_and_ps(x, _mm_set1_ps(-0.0f));

  // Convert to double: split into low 2 and high 2
  __m128d ax_lo = _mm_cvtps_pd(ax_f);
  __m128d ax_hi = _mm_cvtps_pd(_mm_movehl_ps(ax_f, ax_f));

  auto eval_poly = [](__m128d ax) -> __m128d {
    __m128d u = _mm_mul_pd(ax, ax);

    constexpr double c0 = 0x1.20dd74ce6dac1p0;
    constexpr double c1 = -0x1.812728fedb0c3p-2;
    constexpr double c2 = 0x1.ce2c679f0f94dp-4;
    constexpr double c3 = -0x1.b81379b046993p-6;
    constexpr double c4 = 0x1.55decae500c6cp-8;
    constexpr double c5 = -0x1.bd402ca3b1d09p-11;
    constexpr double c6 = 0x1.edfbbbd68d00ep-14;
    constexpr double c7 = -0x1.d43f94bdfb90fp-17;
    constexpr double c8 = 0x1.77643daca82f5p-20;
    constexpr double c9 = -0x1.f276d5cf346ecp-24;
    constexpr double c10 = 0x1.0a42c17eedcadp-27;
    constexpr double c11 = -0x1.b999e591ae6bap-32;
    constexpr double c12 = 0x1.0f73303821975p-36;
    constexpr double c13 = -0x1.ce969a50741b3p-42;
    constexpr double c14 = 0x1.e56af7f1b38e4p-48;
    constexpr double c15 = -0x1.d6f65766c68e5p-55;

    // Estrin level 0
    __m128d p0 = _mm_fmadd_pd(_mm_set1_pd(c1), u, _mm_set1_pd(c0));
    __m128d p1 = _mm_fmadd_pd(_mm_set1_pd(c3), u, _mm_set1_pd(c2));
    __m128d p2 = _mm_fmadd_pd(_mm_set1_pd(c5), u, _mm_set1_pd(c4));
    __m128d p3 = _mm_fmadd_pd(_mm_set1_pd(c7), u, _mm_set1_pd(c6));
    __m128d p4 = _mm_fmadd_pd(_mm_set1_pd(c9), u, _mm_set1_pd(c8));
    __m128d p5 = _mm_fmadd_pd(_mm_set1_pd(c11), u, _mm_set1_pd(c10));
    __m128d p6 = _mm_fmadd_pd(_mm_set1_pd(c13), u, _mm_set1_pd(c12));
    __m128d p7 = _mm_fmadd_pd(_mm_set1_pd(c15), u, _mm_set1_pd(c14));
    __m128d u2 = _mm_mul_pd(u, u);

    // Level 1
    __m128d q0 = _mm_fmadd_pd(p1, u2, p0);
    __m128d q1 = _mm_fmadd_pd(p3, u2, p2);
    __m128d q2 = _mm_fmadd_pd(p5, u2, p4);
    __m128d q3 = _mm_fmadd_pd(p7, u2, p6);
    __m128d u4 = _mm_mul_pd(u2, u2);

    // Level 2
    __m128d r0 = _mm_fmadd_pd(q1, u4, q0);
    __m128d r1 = _mm_fmadd_pd(q3, u4, q2);
    __m128d u8 = _mm_mul_pd(u4, u4);

    // Level 3
    __m128d R = _mm_fmadd_pd(r1, u8, r0);

    return _mm_mul_pd(ax, R);
  };

  __m128d res_lo = eval_poly(ax_lo);
  __m128d res_hi = eval_poly(ax_hi);

  // Convert back to float
  __m128 result = _mm_movelh_ps(_mm_cvtpd_ps(res_lo), _mm_cvtpd_ps(res_hi));

  // Clamp to 1 for |x| >= 3.92
  __m128 saturated = _mm_cmpge_ps(ax_f, _mm_set1_ps(3.92f));
  result = _mm_blendv_ps(result, _mm_set1_ps(1.0f), saturated);

  // Restore sign
  return _mm_or_ps(result, sign);
}

static void consumeSum(__m128 sum) {
  alignas(16) float buf[4];
  _mm_store_ps(buf, sum);
  std::cout << buf[0] + buf[1] + buf[2] + buf[3] << std::endl;
}

// --- Scalar benchmarks ---

void BM_erf_libc(benchmark::State& state) {
  const auto& inputs = erfScalarInputs();
  size_t idx = 0;
  float sum = 0.0f;
  for (auto UNUSED_VAR : state) {
    sum += ::erff(inputs[idx]);
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << std::endl;
}

void BM_erf_s16_scalar(benchmark::State& state) {
  const auto& inputs = erfScalarInputs();
  size_t idx = 0;
  float sum = 0.0f;
  for (auto UNUSED_VAR : state) {
    sum += erf_s16(inputs[idx]);
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << std::endl;
}

void BM_erf_s21_scalar(benchmark::State& state) {
  const auto& inputs = erfScalarInputs();
  size_t idx = 0;
  float sum = 0.0f;
  for (auto UNUSED_VAR : state) {
    sum += erf_s21(inputs[idx]);
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations());
  std::cout << sum << std::endl;
}

// --- SSE benchmarks ---

void BM_erf_s16_sse(benchmark::State& state) {
  const auto& inputs = erfSseInputs();
  size_t idx = 0;
  __m128 sum = _mm_setzero_ps();
  for (auto UNUSED_VAR : state) {
    sum = _mm_add_ps(sum, erf_s16_sse(inputs[idx]));
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 4);
  consumeSum(sum);
}

void BM_erf_s21_sse(benchmark::State& state) {
  const auto& inputs = erfSseInputs();
  size_t idx = 0;
  __m128 sum = _mm_setzero_ps();
  for (auto UNUSED_VAR : state) {
    sum = _mm_add_ps(sum, erf_s21_sse(inputs[idx]));
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 4);
  consumeSum(sum);
}

BENCHMARK(BM_erf_libc);
BENCHMARK(BM_erf_s16_scalar);
BENCHMARK(BM_erf_s21_scalar);
BENCHMARK(BM_erf_s16_sse);
BENCHMARK(BM_erf_s21_sse);

#if defined(__AVX2__)

static void consumeSum256(__m256 sum) {
  alignas(32) float buf[8];
  _mm256_store_ps(buf, sum);
  float s = 0;
  for (int i = 0; i < 8; ++i)
    s += buf[i];
  std::cout << s << std::endl;
}

// --- AVX S16 ---

static inline __m256 erf_s16_avx(__m256 x) {
  __m256 ax = dfm::fabs(x);
  __m256 sign = _mm256_and_ps(x, _mm256_set1_ps(-0.0f));

  // Near-zero path
  __m256 u = _mm256_mul_ps(ax, ax);
  constexpr float nc0 = 0x1.20dd76p+0f;
  constexpr float nq0 = -0x1.812746p-2f, nq1 = 0x1.ce2ec6p-4f, nq2 = -0x1.b81edep-6f;
  constexpr float nq3 = 0x1.556b48p-8f, nq4 = -0x1.b0255p-11f, nq5 = 0x1.7149c8p-14f;
  __m256 q = _mm256_fmadd_ps(
      _mm256_fmadd_ps(
          _mm256_fmadd_ps(
              _mm256_fmadd_ps(
                  _mm256_fmadd_ps(_mm256_set1_ps(nq5), u, _mm256_set1_ps(nq4)),
                  u,
                  _mm256_set1_ps(nq3)),
              u,
              _mm256_set1_ps(nq2)),
          u,
          _mm256_set1_ps(nq1)),
      u,
      _mm256_set1_ps(nq0));
  __m256 near_zero = _mm256_mul_ps(ax, _mm256_fmadd_ps(q, u, _mm256_set1_ps(nc0)));

  // Erfc path
  constexpr float p = 0.45f;
  __m256 t = _mm256_div_ps(
      _mm256_set1_ps(1.0f), _mm256_fmadd_ps(_mm256_set1_ps(p), ax, _mm256_set1_ps(1.0f)));

  constexpr float pc0 = 0x1.04873ep-2f, pc1 = 0x1.f81fc6p-3f, pc2 = 0x1.189f42p-2f;
  constexpr float pc3 = 0x1.15aaa6p-5f, pc4 = 0x1.65d24ep-2f, pc5 = -0x1.4432a4p-3f;
  __m256 ppoly = _mm256_mul_ps(
      t,
      _mm256_fmadd_ps(
          _mm256_fmadd_ps(
              _mm256_fmadd_ps(
                  _mm256_fmadd_ps(
                      _mm256_fmadd_ps(_mm256_set1_ps(pc5), t, _mm256_set1_ps(pc4)),
                      t,
                      _mm256_set1_ps(pc3)),
                  t,
                  _mm256_set1_ps(pc2)),
              t,
              _mm256_set1_ps(pc1)),
          t,
          _mm256_set1_ps(pc0)));

  // Inline exp(-x^2)
  constexpr float kLog2e = 0x1.715476p+0f;
  constexpr float kLn2hi = 0x1.62e400p-1f;
  constexpr float kLn2lo = 0x1.7f7d1cp-20f;
  __m256 kv = _mm256_floor_ps(_mm256_mul_ps(u, _mm256_set1_ps(kLog2e)));
  __m256 f = _mm256_fmadd_ps(kv, _mm256_set1_ps(-kLn2hi), u);
  f = _mm256_fmadd_ps(kv, _mm256_set1_ps(-kLn2lo), f);
  constexpr float e0 = 0x1.fffffep-1f, e1 = -0x1.ffff1ep-1f;
  constexpr float e2 = 0x1.ffe314p-2f, e3 = -0x1.53f876p-3f;
  constexpr float e4 = 0x1.462f16p-5f, e5 = -0x1.80e5b2p-8f;
  __m256 exp_neg_f = _mm256_fmadd_ps(
      _mm256_fmadd_ps(
          _mm256_fmadd_ps(
              _mm256_fmadd_ps(
                  _mm256_fmadd_ps(_mm256_set1_ps(e5), f, _mm256_set1_ps(e4)),
                  f,
                  _mm256_set1_ps(e3)),
              f,
              _mm256_set1_ps(e2)),
          f,
          _mm256_set1_ps(e1)),
      f,
      _mm256_set1_ps(e0));
  __m256i ki = _mm256_cvtps_epi32(kv);
  __m256 pow2_neg_k =
      _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_sub_epi32(_mm256_set1_epi32(127), ki), 23));

  __m256 erfc_result = _mm256_sub_ps(
      _mm256_set1_ps(1.0f), _mm256_mul_ps(_mm256_mul_ps(pow2_neg_k, exp_neg_f), ppoly));

  // Blend
  __m256 use_erfc = _mm256_cmp_ps(ax, _mm256_set1_ps(0.875f), _CMP_GE_OQ);
  __m256 result = _mm256_blendv_ps(near_zero, erfc_result, use_erfc);

  __m256 saturated = _mm256_cmp_ps(ax, _mm256_set1_ps(3.92f), _CMP_GE_OQ);
  result = _mm256_blendv_ps(result, _mm256_set1_ps(1.0f), saturated);

  return _mm256_or_ps(result, sign);
}

// --- AVX S21 (process 8 floats via 4x double2) ---

static inline __m256 erf_s21_avx(__m256 x) {
  __m256 ax_f = dfm::fabs(x);
  __m256 sign = _mm256_and_ps(x, _mm256_set1_ps(-0.0f));

  // Split 8 floats into 4 groups of 2 doubles
  __m128 lo4 = _mm256_castps256_ps128(ax_f);
  __m128 hi4 = _mm256_extractf128_ps(ax_f, 1);
  __m128d ax_0 = _mm_cvtps_pd(lo4);
  __m128d ax_1 = _mm_cvtps_pd(_mm_movehl_ps(lo4, lo4));
  __m128d ax_2 = _mm_cvtps_pd(hi4);
  __m128d ax_3 = _mm_cvtps_pd(_mm_movehl_ps(hi4, hi4));

  auto eval_poly = [](__m128d ax) -> __m128d {
    __m128d u = _mm_mul_pd(ax, ax);
    constexpr double c0 = 0x1.20dd74ce6dac1p0;
    constexpr double c1 = -0x1.812728fedb0c3p-2;
    constexpr double c2 = 0x1.ce2c679f0f94dp-4;
    constexpr double c3 = -0x1.b81379b046993p-6;
    constexpr double c4 = 0x1.55decae500c6cp-8;
    constexpr double c5 = -0x1.bd402ca3b1d09p-11;
    constexpr double c6 = 0x1.edfbbbd68d00ep-14;
    constexpr double c7 = -0x1.d43f94bdfb90fp-17;
    constexpr double c8 = 0x1.77643daca82f5p-20;
    constexpr double c9 = -0x1.f276d5cf346ecp-24;
    constexpr double c10 = 0x1.0a42c17eedcadp-27;
    constexpr double c11 = -0x1.b999e591ae6bap-32;
    constexpr double c12 = 0x1.0f73303821975p-36;
    constexpr double c13 = -0x1.ce969a50741b3p-42;
    constexpr double c14 = 0x1.e56af7f1b38e4p-48;
    constexpr double c15 = -0x1.d6f65766c68e5p-55;

    __m128d p0 = _mm_fmadd_pd(_mm_set1_pd(c1), u, _mm_set1_pd(c0));
    __m128d p1 = _mm_fmadd_pd(_mm_set1_pd(c3), u, _mm_set1_pd(c2));
    __m128d p2 = _mm_fmadd_pd(_mm_set1_pd(c5), u, _mm_set1_pd(c4));
    __m128d p3 = _mm_fmadd_pd(_mm_set1_pd(c7), u, _mm_set1_pd(c6));
    __m128d p4 = _mm_fmadd_pd(_mm_set1_pd(c9), u, _mm_set1_pd(c8));
    __m128d p5 = _mm_fmadd_pd(_mm_set1_pd(c11), u, _mm_set1_pd(c10));
    __m128d p6 = _mm_fmadd_pd(_mm_set1_pd(c13), u, _mm_set1_pd(c12));
    __m128d p7 = _mm_fmadd_pd(_mm_set1_pd(c15), u, _mm_set1_pd(c14));
    __m128d u2 = _mm_mul_pd(u, u);

    __m128d q0 = _mm_fmadd_pd(p1, u2, p0);
    __m128d q1 = _mm_fmadd_pd(p3, u2, p2);
    __m128d q2 = _mm_fmadd_pd(p5, u2, p4);
    __m128d q3 = _mm_fmadd_pd(p7, u2, p6);
    __m128d u4 = _mm_mul_pd(u2, u2);

    __m128d r0 = _mm_fmadd_pd(q1, u4, q0);
    __m128d r1 = _mm_fmadd_pd(q3, u4, q2);
    __m128d u8 = _mm_mul_pd(u4, u4);

    __m128d R = _mm_fmadd_pd(r1, u8, r0);
    return _mm_mul_pd(ax, R);
  };

  __m128d res_0 = eval_poly(ax_0);
  __m128d res_1 = eval_poly(ax_1);
  __m128d res_2 = eval_poly(ax_2);
  __m128d res_3 = eval_poly(ax_3);

  // Convert back: 4x double2 -> 2x float4 -> 1x float8
  __m128 lo_f = _mm_movelh_ps(_mm_cvtpd_ps(res_0), _mm_cvtpd_ps(res_1));
  __m128 hi_f = _mm_movelh_ps(_mm_cvtpd_ps(res_2), _mm_cvtpd_ps(res_3));
  __m256 result = _mm256_set_m128(hi_f, lo_f);

  // Clamp
  __m256 saturated = _mm256_cmp_ps(ax_f, _mm256_set1_ps(3.92f), _CMP_GE_OQ);
  result = _mm256_blendv_ps(result, _mm256_set1_ps(1.0f), saturated);

  return _mm256_or_ps(result, sign);
}

void BM_erf_s16_avx(benchmark::State& state) {
  const auto& inputs = erfAvxInputs();
  size_t idx = 0;
  __m256 sum = _mm256_setzero_ps();
  for (auto UNUSED_VAR : state) {
    sum = _mm256_add_ps(sum, erf_s16_avx(inputs[idx]));
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 8);
  consumeSum256(sum);
}

void BM_erf_s21_avx(benchmark::State& state) {
  const auto& inputs = erfAvxInputs();
  size_t idx = 0;
  __m256 sum = _mm256_setzero_ps();
  for (auto UNUSED_VAR : state) {
    sum = _mm256_add_ps(sum, erf_s21_avx(inputs[idx]));
    idx = (idx + 1) & kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 8);
  consumeSum256(sum);
}

BENCHMARK(BM_erf_s16_avx);
BENCHMARK(BM_erf_s21_avx);

#endif // __AVX2__

#else // !__SSE4_1__

int main() {
  std::cout << "SSE4.1 not available, skipping benchmarks." << std::endl;
  return 0;
}

#endif // __SSE4_1__


================================================
FILE: benchmarks/fast_math/hwy_benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "benchmark_helpers.h"

#if __has_include("hwy/highway.h")
#include "hwy/contrib/math/math-inl.h"

namespace dfm = dispenso::fast_math;
namespace bench = dispenso::fast_math::bench;
namespace hn = hwy::HWY_NAMESPACE;
using Flt = dfm::HwyFloat;
using HwyFloatTag = dfm::HwyFloatTag;

struct BoundsTraits {
  static constexpr bool kMaxAccuracy = false;
  static constexpr bool kBoundsValues = true;
};

// --- One-arg benchmarks ---

void BM_sin_hwy(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::sin(x); });
}
void BM_sin_hwy_accurate(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) {
    return dfm::sin<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_cos_hwy(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::cos(x); });
}
void BM_cos_hwy_accurate(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) {
    return dfm::cos<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_tan_hwy(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::tan(x); });
}
void BM_atan_hwy(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::atan(x); });
}
void BM_acos_hwy(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::acos(x); });
}
void BM_asin_hwy(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::asin(x); });
}

void BM_exp_hwy(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp(x); });
}
void BM_exp_hwy_accurate(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) {
    return dfm::exp<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_exp_hwy_bounds(benchmark::State& state) {
  bench::runBench(
      state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp<Flt, BoundsTraits>(x); });
}
void BM_exp2_hwy(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp2(x); });
}
void BM_exp10_hwy(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp10(x); });
}
void BM_expm1_hwy(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::expm1(x); });
}

void BM_log_hwy(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log(x); });
}
void BM_log_hwy_accurate(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    return dfm::log<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_log2_hwy(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log2(x); });
}
void BM_log10_hwy(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log10(x); });
}
void BM_log1p_hwy(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::log1p(hn::Abs(x.v)); });
}

void BM_cbrt_hwy(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::cbrt(x); });
}
void BM_cbrt_hwy_accurate(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    return dfm::cbrt<Flt, dfm::MaxAccuracyTraits>(x);
  });
}

void BM_frexp_hwy(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    dfm::IntType_t<Flt> e;
    return dfm::frexp(x, &e);
  });
}
void BM_ldexp_hwy(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    const hn::RebindToSigned<HwyFloatTag> di;
    return dfm::ldexp(x, hn::Set(di, 3));
  });
}

void BM_tanh_hwy(benchmark::State& state) {
  bench::runBench(state, bench::tanhInputs<Flt>(), [](auto x) { return dfm::tanh(x); });
}
void BM_erf_hwy(benchmark::State& state) {
  bench::runBench(state, bench::erfInputs<Flt>(), [](auto x) { return dfm::erf(x); });
}

// --- Two-arg benchmarks ---

void BM_atan2_hwy(benchmark::State& state) {
  bench::runBench2(state, bench::expInputs<Flt>(), bench::sinInputs<Flt>(), [](auto y, auto x) {
    return dfm::atan2(y, x);
  });
}

void BM_hypot_hwy(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot(x, y);
  });
}
void BM_hypot_hwy_bounds(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot<Flt, dfm::MaxAccuracyTraits>(x, y);
  });
}

// --- Highway contrib/math comparison benchmarks ---

void BM_sin_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::sinInputs<Flt>(), [](auto x) { return hn::Sin(HwyFloatTag(), x.v); });
}
void BM_cos_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::sinInputs<Flt>(), [](auto x) { return hn::Cos(HwyFloatTag(), x.v); });
}
void BM_exp_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::expInputs<Flt>(), [](auto x) { return hn::Exp(HwyFloatTag(), x.v); });
}
void BM_exp2_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::expInputs<Flt>(), [](auto x) { return hn::Exp2(HwyFloatTag(), x.v); });
}
void BM_log_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return hn::Log(HwyFloatTag(), x.v); });
}
void BM_log2_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return hn::Log2(HwyFloatTag(), x.v); });
}
void BM_log10_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return hn::Log10(HwyFloatTag(), x.v); });
}
void BM_atan_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::sinInputs<Flt>(), [](auto x) { return hn::Atan(HwyFloatTag(), x.v); });
}
void BM_acos_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::acosInputs<Flt>(), [](auto x) { return hn::Acos(HwyFloatTag(), x.v); });
}
void BM_asin_hwy_contrib(benchmark::State& state) {
  bench::runBench(
      state, bench::acosInputs<Flt>(), [](auto x) { return hn::Asin(HwyFloatTag(), x.v); });
}
void BM_atan2_hwy_contrib(benchmark::State& state) {
  bench::runBench2(state, bench::expInputs<Flt>(), bench::sinInputs<Flt>(), [](auto y, auto x) {
    return hn::Atan2(HwyFloatTag(), y.v, x.v);
  });
}

// --- Registrations ---

BENCHMARK(BM_sin_hwy);
BENCHMARK(BM_sin_hwy_accurate);
BENCHMARK(BM_sin_hwy_contrib);
BENCHMARK(BM_cos_hwy);
BENCHMARK(BM_cos_hwy_accurate);
BENCHMARK(BM_cos_hwy_contrib);
BENCHMARK(BM_tan_hwy);
BENCHMARK(BM_atan_hwy);
BENCHMARK(BM_atan_hwy_contrib);
BENCHMARK(BM_acos_hwy);
BENCHMARK(BM_acos_hwy_contrib);
BENCHMARK(BM_asin_hwy);
BENCHMARK(BM_asin_hwy_contrib);
BENCHMARK(BM_exp_hwy);
BENCHMARK(BM_exp_hwy_accurate);
BENCHMARK(BM_exp_hwy_bounds);
BENCHMARK(BM_exp_hwy_contrib);
BENCHMARK(BM_exp2_hwy);
BENCHMARK(BM_exp2_hwy_contrib);
BENCHMARK(BM_exp10_hwy);
BENCHMARK(BM_expm1_hwy);
BENCHMARK(BM_log_hwy);
BENCHMARK(BM_log_hwy_accurate);
BENCHMARK(BM_log_hwy_contrib);
BENCHMARK(BM_log2_hwy);
BENCHMARK(BM_log2_hwy_contrib);
BENCHMARK(BM_log10_hwy);
BENCHMARK(BM_log10_hwy_contrib);
BENCHMARK(BM_log1p_hwy);
BENCHMARK(BM_cbrt_hwy);
BENCHMARK(BM_cbrt_hwy_accurate);
BENCHMARK(BM_frexp_hwy);
BENCHMARK(BM_ldexp_hwy);
BENCHMARK(BM_tanh_hwy);
BENCHMARK(BM_erf_hwy);
BENCHMARK(BM_atan2_hwy);
BENCHMARK(BM_atan2_hwy_contrib);
BENCHMARK(BM_hypot_hwy);
BENCHMARK(BM_hypot_hwy_bounds);

#else // !__has_include("hwy/highway.h")

int main() {
  std::cout << "Highway not available, skipping benchmarks." << std::endl;
  return 0;
}

#endif // __has_include("hwy/highway.h")


================================================
FILE: benchmarks/fast_math/neon_benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "benchmark_helpers.h"

#if defined(__aarch64__)

namespace dfm = dispenso::fast_math;
namespace bench = dispenso::fast_math::bench;
using Flt = float32x4_t;

// --- One-arg benchmarks ---

void BM_sin_neon(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::sin(x); });
}
void BM_cos_neon(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::cos(x); });
}
void BM_tan_neon(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::tan(x); });
}
void BM_atan_neon(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::atan(x); });
}
void BM_acos_neon(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::acos(x); });
}
void BM_asin_neon(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::asin(x); });
}

void BM_exp_neon(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp(x); });
}
void BM_exp2_neon(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp2(x); });
}
void BM_exp10_neon(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp10(x); });
}
void BM_expm1_neon(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::expm1(x); });
}

void BM_log_neon(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log(x); });
}
void BM_log2_neon(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log2(x); });
}
void BM_log10_neon(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log10(x); });
}
void BM_log1p_neon(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](Flt x) { return dfm::log1p(vabsq_f32(x)); });
}

void BM_cbrt_neon(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::cbrt(x); });
}

void BM_frexp_neon(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    dfm::IntType_t<Flt> e;
    return dfm::frexp(x, &e);
  });
}
void BM_ldexp_neon(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return dfm::ldexp(x, vdupq_n_s32(3)); });
}

void BM_tanh_neon(benchmark::State& state) {
  bench::runBench(state, bench::tanhInputs<Flt>(), [](auto x) { return dfm::tanh(x); });
}
void BM_erf_neon(benchmark::State& state) {
  bench::runBench(state, bench::erfInputs<Flt>(), [](auto x) { return dfm::erf(x); });
}

// --- Two-arg benchmarks ---

void BM_atan2_neon(benchmark::State& state) {
  bench::runBench2(state, bench::expInputs<Flt>(), bench::sinInputs<Flt>(), [](auto y, auto x) {
    return dfm::atan2(y, x);
  });
}

void BM_hypot_neon(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot(x, y);
  });
}
void BM_hypot_neon_bounds(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot<Flt, dfm::MaxAccuracyTraits>(x, y);
  });
}

void BM_pow_neon(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow(b, e);
      });
}
void BM_pow_neon_accurate(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow<Flt, dfm::MaxAccuracyTraits>(b, e);
      });
}
void BM_pow_neon_scalar_exp(benchmark::State& state) {
  bench::runBench(state, bench::powBaseInputs<Flt>(), [](auto x) { return dfm::pow(x, 2.5f); });
}

// --- Libc-packed comparison (NEON-specific, kept hand-written) ---

void BM_pow_libc_neon(benchmark::State& state) {
  const auto& bases = bench::powBaseInputs<Flt>();
  const auto& exps = bench::powExpInputs<Flt>();
  size_t idx = 0;
  Flt sum = vdupq_n_f32(0.0f);
  for (auto _ : state) {
    (void)_;
    alignas(16) float x[4], y[4], r[4];
    vst1q_f32(x, bases[idx]);
    vst1q_f32(y, exps[idx]);
    for (int32_t i = 0; i < 4; ++i) {
      r[i] = ::powf(x[i], y[i]);
    }
    sum = vaddq_f32(sum, vld1q_f32(r));
    idx = (idx + 1) & bench::kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 4);
  bench::consumeResult(sum);
}

// --- Registrations ---

BENCHMARK(BM_sin_neon);
BENCHMARK(BM_cos_neon);
BENCHMARK(BM_tan_neon);
BENCHMARK(BM_atan_neon);
BENCHMARK(BM_acos_neon);
BENCHMARK(BM_asin_neon);
BENCHMARK(BM_exp_neon);
BENCHMARK(BM_exp2_neon);
BENCHMARK(BM_exp10_neon);
BENCHMARK(BM_expm1_neon);
BENCHMARK(BM_log_neon);
BENCHMARK(BM_log2_neon);
BENCHMARK(BM_log10_neon);
BENCHMARK(BM_log1p_neon);
BENCHMARK(BM_cbrt_neon);
BENCHMARK(BM_frexp_neon);
BENCHMARK(BM_ldexp_neon);
BENCHMARK(BM_tanh_neon);
BENCHMARK(BM_erf_neon);
BENCHMARK(BM_atan2_neon);
BENCHMARK(BM_hypot_neon);
BENCHMARK(BM_hypot_neon_bounds);
BENCHMARK(BM_pow_neon);
BENCHMARK(BM_pow_neon_accurate);
BENCHMARK(BM_pow_neon_scalar_exp);
BENCHMARK(BM_pow_libc_neon);

#else // !defined(__aarch64__)

int main() {
  std::cout << "AArch64 NEON not available, skipping benchmarks." << std::endl;
  return 0;
}

#endif // defined(__aarch64__)


================================================
FILE: benchmarks/fast_math/sse_benchmarks.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "benchmark_helpers.h"

#if defined(__SSE4_1__)

namespace dfm = dispenso::fast_math;
namespace bench = dispenso::fast_math::bench;
using Flt = __m128;

struct BoundsTraits {
  static constexpr bool kMaxAccuracy = false;
  static constexpr bool kBoundsValues = true;
};

// --- One-arg benchmarks ---

void BM_sin_sse(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::sin(x); });
}
void BM_sin_sse_accurate(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) {
    return dfm::sin<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_cos_sse(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::cos(x); });
}
void BM_cos_sse_accurate(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) {
    return dfm::cos<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_tan_sse(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::tan(x); });
}
void BM_atan_sse(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::atan(x); });
}
void BM_acos_sse(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::acos(x); });
}
void BM_asin_sse(benchmark::State& state) {
  bench::runBench(state, bench::acosInputs<Flt>(), [](auto x) { return dfm::asin(x); });
}

void BM_exp_sse(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp(x); });
}
void BM_exp_sse_accurate(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) {
    return dfm::exp<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_exp_sse_bounds(benchmark::State& state) {
  bench::runBench(
      state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp<Flt, BoundsTraits>(x); });
}
void BM_exp2_sse(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp2(x); });
}
void BM_exp10_sse(benchmark::State& state) {
  bench::runBench(state, bench::expInputs<Flt>(), [](auto x) { return dfm::exp10(x); });
}
void BM_expm1_sse(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](auto x) { return dfm::expm1(x); });
}

void BM_log_sse(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log(x); });
}
void BM_log_sse_accurate(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    return dfm::log<Flt, dfm::MaxAccuracyTraits>(x);
  });
}
void BM_log2_sse(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log2(x); });
}
void BM_log10_sse(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::log10(x); });
}
void BM_log1p_sse(benchmark::State& state) {
  bench::runBench(state, bench::sinInputs<Flt>(), [](Flt x) {
    Flt ax = _mm_andnot_ps(_mm_set1_ps(-0.0f), x);
    return dfm::log1p(ax);
  });
}

void BM_cbrt_sse(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) { return dfm::cbrt(x); });
}
void BM_cbrt_sse_accurate(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    return dfm::cbrt<Flt, dfm::MaxAccuracyTraits>(x);
  });
}

void BM_frexp_sse(benchmark::State& state) {
  bench::runBench(state, bench::logInputs<Flt>(), [](auto x) {
    dfm::IntType_t<Flt> e;
    return dfm::frexp(x, &e);
  });
}
void BM_ldexp_sse(benchmark::State& state) {
  bench::runBench(
      state, bench::logInputs<Flt>(), [](auto x) { return dfm::ldexp(x, _mm_set1_epi32(3)); });
}

void BM_tanh_sse(benchmark::State& state) {
  bench::runBench(state, bench::tanhInputs<Flt>(), [](auto x) { return dfm::tanh(x); });
}
void BM_erf_sse(benchmark::State& state) {
  bench::runBench(state, bench::erfInputs<Flt>(), [](auto x) { return dfm::erf(x); });
}

// --- Two-arg benchmarks ---

void BM_atan2_sse(benchmark::State& state) {
  bench::runBench2(state, bench::expInputs<Flt>(), bench::sinInputs<Flt>(), [](auto y, auto x) {
    return dfm::atan2(y, x);
  });
}

void BM_hypot_sse(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot(x, y);
  });
}
void BM_hypot_sse_bounds(benchmark::State& state) {
  bench::runBench2(state, bench::hypotInputs<Flt>(), bench::sinInputs<Flt>(), [](auto x, auto y) {
    return dfm::hypot<Flt, dfm::MaxAccuracyTraits>(x, y);
  });
}

void BM_pow_sse(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow(b, e);
      });
}
void BM_pow_sse_accurate(benchmark::State& state) {
  bench::runBench2(
      state, bench::powBaseInputs<Flt>(), bench::powExpInputs<Flt>(), [](auto b, auto e) {
        return dfm::pow<Flt, dfm::MaxAccuracyTraits>(b, e);
      });
}
void BM_pow_sse_scalar_exp(benchmark::State& state) {
  bench::runBench(state, bench::powBaseInputs<Flt>(), [](auto x) { return dfm::pow(x, 2.5f); });
}

// --- Libc-packed comparisons (SSE-specific, kept hand-written) ---

void BM_hypot_libc(benchmark::State& state) {
  const auto& inputs = bench::hypotInputs<Flt>();
  const auto& inputs2 = bench::sinInputs<Flt>();
  size_t idx = 0;
  Flt sum = _mm_setzero_ps();
  for (auto _ : state) {
    (void)_;
    alignas(16) float x[4], y[4], r[4];
    _mm_store_ps(x, inputs[idx]);
    _mm_store_ps(y, inputs2[idx]);
    r[0] = ::hypotf(x[0], y[0]);
    r[1] = ::hypotf(x[1], y[1]);
    r[2] = ::hypotf(x[2], y[2]);
    r[3] = ::hypotf(x[3], y[3]);
    sum = _mm_add_ps(sum, _mm_load_ps(r));
    idx = (idx + 1) & bench::kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 4);
  bench::consumeResult(sum);
}

void BM_pow_libc_sse(benchmark::State& state) {
  const auto& bases = bench::powBaseInputs<Flt>();
  const auto& exps = bench::powExpInputs<Flt>();
  size_t idx = 0;
  Flt sum = _mm_setzero_ps();
  for (auto _ : state) {
    (void)_;
    alignas(16) float x[4], y[4], r[4];
    _mm_store_ps(x, bases[idx]);
    _mm_store_ps(y, exps[idx]);
    r[0] = ::powf(x[0], y[0]);
    r[1] = ::powf(x[1], y[1]);
    r[2] = ::powf(x[2], y[2]);
    r[3] = ::powf(x[3], y[3]);
    sum = _mm_add_ps(sum, _mm_load_ps(r));
    idx = (idx + 1) & bench::kInputsMask;
  }
  state.SetItemsProcessed(state.iterations() * 4);
  bench::consumeResult(sum);
}

// --- Registrations ---

BENCHMARK(BM_sin_sse);
BENCHMARK(BM_sin_sse_accurate);
BENCHMARK(BM_cos_sse);
BENCHMARK(BM_cos_sse_accurate);
BENCHMARK(BM_tan_sse);
BENCHMARK(BM_atan_sse);
BENCHMARK(BM_acos_sse);
BENCHMARK(BM_asin_sse);
BENCHMARK(BM_exp_sse);
BENCHMARK(BM_exp_sse_accurate);
BENCHMARK(BM_exp_sse_bounds);
BENCHMARK(BM_exp2_sse);
BENCHMARK(BM_exp10_sse);
BENCHMARK(BM_expm1_sse);
BENCHMARK(BM_log_sse);
BENCHMARK(BM_log_sse_accurate);
BENCHMARK(BM_log2_sse);
BENCHMARK(BM_log10_sse);
BENCHMARK(BM_log1p_sse);
BENCHMARK(BM_cbrt_sse);
BENCHMARK(BM_cbrt_sse_accurate);
BENCHMARK(BM_frexp_sse);
BENCHMARK(BM_ldexp_sse);
BENCHMARK(BM_tanh_sse);
BENCHMARK(BM_erf_sse);
BENCHMARK(BM_atan2_sse);
BENCHMARK(BM_hypot_sse);
BENCHMARK(BM_hypot_sse_bounds);
BENCHMARK(BM_hypot_libc);
BENCHMARK(BM_pow_sse);
BENCHMARK(BM_pow_sse_accurate);
BENCHMARK(BM_pow_sse_scalar_exp);
BENCHMARK(BM_pow_libc_sse);

#else // !defined(__SSE4_1__)

int main() {
  std::cout << "SSE4.1 not available, skipping benchmarks." << std::endl;
  return 0;
}

#endif // defined(__SSE4_1__)


================================================
FILE: benchmarks/for_each_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * Benchmarks for dispenso::for_each_n / dispenso::for_each.
 * Tests scheduling overhead across different container/iterator types:
 *   - vector (random-access)
 *   - deque (random-access)
 *   - list (bidirectional)
 *   - set (bidirectional, const elements)
 */

#include <dispenso/for_each.h>

#include <deque>
#include <list>
#include <set>
#include <unordered_map>

#include "thread_benchmark_common.h"

static uint32_t kSeed(8);
static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 1000000;
static constexpr int kLargeSize = 100000000;

const std::vector<int>& getInputs(int num_elements) {
  static std::unordered_map<int, std::vector<int>> vecs;
  auto it = vecs.find(num_elements);
  if (it != vecs.end()) {
    return it->second;
  }
  srand(kSeed);
  std::vector<int> values;
  values.reserve(num_elements);
  for (int i = 0; i < num_elements; ++i) {
    values.push_back((rand() & 255) - 127);
  }
  auto res = vecs.emplace(num_elements, std::move(values));
  assert(res.second);
  return res.first->second;
}

void checkResults(const std::vector<int>& input, const std::vector<int>& output) {
  for (size_t i = 0; i < input.size(); ++i) {
    if (output[i] != input[i] * input[i] - 3 * input[i]) {
      std::cerr << "FAIL! " << output[i] << " vs " << input[i] * input[i] - 3 * input[i]
                << std::endl;
      abort();
    }
  }
}

template <int num_elements>
void BM_serial(benchmark::State& state) {
  std::vector<int> output(num_elements, 0);
  auto& input = getInputs(num_elements);

  for (auto UNUSED_VAR : state) {
    for (size_t i = 0; i < num_elements; ++i) {
      output[i] = input[i] * input[i] - 3 * input[i];
    }
  }
}

void BM_for_each_n(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);
  dispenso::ThreadPool pool(num_threads);

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    dispenso::for_each_n(
        tasks, input.begin(), static_cast<size_t>(num_elements), [&output, &input](const int& val) {
          size_t idx = static_cast<size_t>(&val - input.data());
          output[idx] = val * val - 3 * val;
        });
  }
  checkResults(input, output);
}

void BM_for_each_n_deque(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  auto& input = getInputs(num_elements);
  std::deque<int> deq(input.begin(), input.end());
  dispenso::ThreadPool pool(num_threads);

  std::atomic<int64_t> sum(0);
  for (auto UNUSED_VAR : state) {
    sum.store(0, std::memory_order_relaxed);
    dispenso::TaskSet tasks(pool);
    dispenso::for_each_n(
        tasks, deq.begin(), static_cast<size_t>(num_elements), [&sum](const int& val) {
          sum.fetch_add(val * val - 3 * val, std::memory_order_relaxed);
        });
  }
  benchmark::DoNotOptimize(sum.load());
}

void BM_for_each_n_list(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  auto& input = getInputs(num_elements);
  std::list<int> lst(input.begin(), input.end());
  dispenso::ThreadPool pool(num_threads);

  std::atomic<int64_t> sum(0);
  for (auto UNUSED_VAR : state) {
    sum.store(0, std::memory_order_relaxed);
    dispenso::TaskSet tasks(pool);
    dispenso::for_each_n(
        tasks, lst.begin(), static_cast<size_t>(num_elements), [&sum](const int& val) {
          sum.fetch_add(val * val - 3 * val, std::memory_order_relaxed);
        });
  }
  benchmark::DoNotOptimize(sum.load());
}

void BM_for_each_n_set(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  auto& input = getInputs(num_elements);
  std::set<int> s(input.begin(), input.end());
  // set deduplicates, so actual size may be smaller
  size_t actual_size = s.size();
  dispenso::ThreadPool pool(num_threads);

  std::atomic<int64_t> sum(0);
  for (auto UNUSED_VAR : state) {
    sum.store(0, std::memory_order_relaxed);
    dispenso::TaskSet tasks(pool);
    dispenso::for_each_n(tasks, s.begin(), actual_size, [&sum](const int& val) {
      sum.fetch_add(val * val - 3 * val, std::memory_order_relaxed);
    });
  }
  benchmark::DoNotOptimize(sum.load());
}

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

// Smaller argument set for containers where 100M elements is impractical
static void SmallArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
BENCHMARK_TEMPLATE(BM_serial, kLargeSize);

BENCHMARK(BM_for_each_n)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_for_each_n_deque)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_for_each_n_list)->Apply(SmallArguments)->UseRealTime();
BENCHMARK(BM_for_each_n_set)->Apply(SmallArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/for_latency_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/parallel_for.h>
#include <dispenso/thread_pool.h>
#include <dispenso/timing.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#include <random>
#include <unordered_map>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_for.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

namespace {

using namespace std::chrono_literals;

uint32_t kSeed(8);
constexpr int kSize = 50000;
constexpr auto kSleep = 30ms;
} // namespace

// Adapted from Google gtest examples
// Returns true iff n is a prime number.
bool isPrime(int n) {
  // Trivial case 1: small numbers
  if (n <= 1)
    return false;

  // Trivial case 2: even numbers
  if (n % 2 == 0)
    return n == 2;

  // Now, we have that n is odd and n >= 3.

  // Try to divide n by every odd number i, starting from 3
  for (int i = 3;; i += 2) {
    // We only have to try i up to the squre root of n
    if (i > n / i)
      break;

    // Now, we have i <= n/i < n.
    // If n is divisible by i, n is not prime.
    if (n % i == 0)
      return false;
  }

  // n has no integer factor in the range (1, n), and thus is prime.
  return true;
}

const std::vector<int>& getInputs(int numElements) {
  static std::unordered_map<int, std::vector<int>> vecs;
  auto it = vecs.find(numElements);
  if (it != vecs.end()) {
    return it->second;
  }

  std::mt19937_64 gen64(kSeed);
  std::uniform_int_distribution<> distribution(100000, 1000000);
  std::vector<int> values;
  values.reserve(numElements);
  for (int i = 0; i < numElements; ++i) {
    values.push_back(distribution(gen64));
  }
  auto res = vecs.emplace(numElements, std::move(values));
  assert(res.second);
  return res.first->second;
}

void BM_serial(benchmark::State& state) {
  std::vector<int> output(kSize, 0);
  auto& input = getInputs(kSize);

  std::vector<double> times;
  times.reserve(1000);

  for (auto UNUSED_VAR : state) {
    std::this_thread::sleep_for(kSleep);
    times.push_back(dispenso::getTime());
    for (size_t i = 0; i < kSize; ++i) {
      output[i] = isPrime(input[i]);
    }
    times.back() = dispenso::getTime() - times.back();
  }

  doStats(times, state);
}

void BM_dispenso(benchmark::State& state) {
  const int numThreads = state.range(0) - 1;

  std::vector<int> output(kSize, 0);
  dispenso::resizeGlobalThreadPool(numThreads);

  std::vector<double> times;
  times.reserve(1000);

  auto& input = getInputs(kSize);
  for (auto UNUSED_VAR : state) {
    std::this_thread::sleep_for(kSleep);
    times.push_back(dispenso::getTime());
    dispenso::parallel_for(
        dispenso::makeChunkedRange(0, kSize), [&input, &output](size_t i, size_t e) {
          for (; i != e; ++i) {
            output[i] = isPrime(input[i]);
          }
        });
    times.back() = dispenso::getTime() - times.back();
  }

  doStats(times, state);
}

#if defined(_OPENMP)
void BM_omp(benchmark::State& state) {
  const int numThreads = state.range(0);

  std::vector<int> output(kSize, 0);
  omp_set_num_threads(numThreads);

  std::vector<double> times;
  times.reserve(1000);

  auto& input = getInputs(kSize);
  for (auto UNUSED_VAR : state) {
    std::this_thread::sleep_for(kSleep);
    times.push_back(dispenso::getTime());
#pragma omp parallel for
    for (int i = 0; i < kSize; ++i) {
      output[i] = isPrime(input[i]);
    }
    times.back() = dispenso::getTime() - times.back();
  }
  doStats(times, state);
}
#endif /*defined(_OPENMP)*/

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int numThreads = state.range(0);

  std::vector<int> output(kSize, 0);

  tbb_compat::task_scheduler_init initsched(numThreads);

  std::vector<double> times;
  times.reserve(1000);

  auto& input = getInputs(kSize);
  for (auto UNUSED_VAR : state) {
    std::this_thread::sleep_for(kSleep);
    times.push_back(dispenso::getTime());
    tbb::parallel_for(
        tbb::blocked_range<size_t>(0, kSize),
        [&input, &output](const tbb::blocked_range<size_t>& r) {
          for (size_t i = r.begin(); i < r.end(); ++i) {
            output[i] = isPrime(input[i]);
          }
        });
    times.back() = dispenso::getTime() - times.back();
  }
  doStats(times, state);
}
#endif // !BENCHMARK_WITHOUT_TBB

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int i : pow2HalfStepThreads()) {
    b->Arg(i);
  }
}

BENCHMARK(BM_serial)->UseRealTime();

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif // OPENMP
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/future_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <array>
#include <cmath>
#include <future>
#include <iostream>
#include <random>

#include <dispenso/future.h>

#if !defined(BENCHMARK_WITHOUT_FOLLY)
#include <folly/executors/CPUThreadPoolExecutor.h>
#include <folly/futures/Future.h>
#endif // !BENCHMARK_WITHOUT_FOLLY

#include "thread_benchmark_common.h"

constexpr size_t kSmallSize = 13;
constexpr size_t kMediumSize = 16;
constexpr size_t kLargeSize = 19;

// Note that there are many optimizations that could be made for these tree build routines.  The
// goal was to make these as apples-to-apples as possible.

struct Node {
  Node* left;
  Node* right;
  uint32_t value;

  void setValue(uint32_t unique_bitset, uint32_t modulo) {
    value = 0;
    for (uint32_t i = 0; i < 32; ++i) {
      value += unique_bitset % modulo;
      unique_bitset /= modulo;
    }
  }
};

class Allocator {
 public:
  void reset(size_t depth) {
    nodes_.resize(std::pow(2, depth) - 1);
    next_.store(0, std::memory_order_release);
  }

  Node* alloc() {
    size_t cur = next_.fetch_add(1, std::memory_order_relaxed);
    return &nodes_[cur];
  }

 private:
  std::vector<Node> nodes_;
  std::atomic<size_t> next_{0};
};

const std::vector<uint32_t>& getModulos() {
  static const std::vector<uint32_t> modulos = []() {
    std::mt19937 mt;
    std::uniform_int_distribution<> dis(2, 55);
    std::vector<uint32_t> m;
    for (size_t i = 0; i < 64; ++i) {
      m.emplace_back(dis(mt));
    }
    return m;
  }();
  return modulos;
}

uint64_t sumTree(Node* root) {
  if (!root) {
    return 0;
  }
  return root->value + sumTree(root->left) + sumTree(root->right);
}

void checkTree(Node* root, uint32_t depth, uint32_t modulo) {
  uint64_t expectedSum = 0;

  uint32_t num = std::pow(2, depth);
  for (uint32_t i = 0; i < num; ++i) {
    auto bitset = i;
    while (bitset) {
      expectedSum += bitset % modulo;
      bitset /= modulo;
    }
  }

  uint64_t actual = sumTree(root);
  if (actual != expectedSum) {
    std::cerr << "Mismatch! " << expectedSum << " vs " << actual << std::endl;
    std::abort();
  }
}

Node* serialTree(Allocator& allocator, uint32_t depth, uint32_t bitset, uint32_t modulo) {
  --depth;
  Node* node = allocator.alloc();
  node->setValue(bitset, modulo);
  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return node;
  }
  node->left = serialTree(allocator, depth, (bitset << 1), modulo);
  node->right = serialTree(allocator, depth, (bitset << 1) | 1, modulo);

  return node;
}

template <size_t depth>
void BM_serial_tree(benchmark::State& state) {
  Allocator alloc;
  alloc.reset(depth);
  getModulos();

  uint32_t modulo;

  Node* root;

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    root = serialTree(alloc, depth, 1, modulo);
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }

  checkTree(root, depth, modulo);
}

Node* stdTree(Allocator& allocator, uint32_t depth, uint32_t bitset, uint32_t modulo) {
  --depth;
  Node* node = allocator.alloc();
  node->setValue(bitset, modulo);
  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return node;
  }
  auto left = std::async([&]() { return stdTree(allocator, depth, (bitset << 1), modulo); });
  auto right = std::async([&]() { return stdTree(allocator, depth, (bitset << 1) | 1, modulo); });
  node->left = left.get();
  node->right = right.get();

  return node;
}

template <size_t depth>
void BM_std_tree(benchmark::State& state) {
#if defined(__APPLE__) || defined(__ANDROID__)
  // std::async on macOS and Android (libc++) creates a real thread per call.
  // At depth 16+ this spawns tens of thousands of threads recursively,
  // exhausting OS thread limits and hanging the process.
  if (depth >= kMediumSize) {
    state.SkipWithError("std::async hangs on this platform at this tree depth");
    return;
  }
#elif defined(_WIN32)
  // std::async on Windows uses a threadpool. The recursive tree construction
  // pattern causes deadlock as threadpool threads wait for tasks that require
  // more threadpool threads. Skip all depths on Windows.
  state.SkipWithError("std::async deadlocks on Windows threadpool");
  return;
#endif

  Allocator alloc;
  alloc.reset(depth);
  getModulos();

  uint32_t modulo;

  Node* root;

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    root = stdTree(alloc, depth, 1, modulo);
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }

  checkTree(root, depth, modulo);
}

Node* dispensoTree(Allocator& allocator, uint32_t depth, uint32_t bitset, uint32_t modulo) {
  --depth;
  Node* node = allocator.alloc();
  node->setValue(bitset, modulo);
  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return node;
  }

  auto left = dispenso::async(
      [=, &allocator]() { return dispensoTree(allocator, depth, (bitset << 1), modulo); });
  auto right = dispenso::async(
      [=, &allocator]() { return dispensoTree(allocator, depth, (bitset << 1) | 1, modulo); });
  node->left = left.get();
  node->right = right.get();

  return node;
}

template <size_t depth>
void BM_dispenso_tree(benchmark::State& state) {
  Allocator alloc;
  alloc.reset(depth);
  getModulos();
  dispenso::globalThreadPool();

  uint32_t modulo;

  Node* root;

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    root = dispensoTree(alloc, depth, 1, modulo);
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }

  checkTree(root, depth, modulo);
}

#if !defined(BENCHMARK_WITHOUT_FOLLY)
folly::SemiFuture<folly::Unit> follyTree(
    folly::Executor* exec,
    Node* node,
    Allocator* allocator,
    uint32_t depth,
    uint32_t bitset,
    uint32_t modulo) {
  --depth;
  node->setValue(bitset, modulo);

  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return folly::Unit{};
  }

  node->left = allocator->alloc();
  node->right = allocator->alloc();

  return folly::via(
             exec,
             [=]() {
               return folly::collectAll(
                          follyTree(exec, node->left, allocator, depth, bitset << 1, modulo),
                          follyTree(exec, node->right, allocator, depth, bitset << 1 | 1, modulo))
                   .unit();
             })
      .semi();
}

template <size_t depth>
void BM_folly_tree(benchmark::State& state) {
  folly::CPUThreadPoolExecutor follyExec{std::thread::hardware_concurrency()};
  Allocator alloc;
  alloc.reset(depth);

  uint32_t modulo;

  Node root;

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    follyTree(&follyExec, &root, &alloc, depth, 1, modulo).via(&follyExec).get();
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }
  checkTree(&root, depth, modulo);
}
#endif // !BENCHMARK_WITHOUT_FOLLY

void dispensoTaskSetTree(
    dispenso::ConcurrentTaskSet& tasks,
    Node* node,
    Allocator& allocator,
    uint32_t depth,
    uint32_t bitset,
    uint32_t modulo) {
  node->setValue(bitset, modulo);
  --depth;

  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return;
  }

  tasks.schedule(
      [&tasks, &allocator, node, depth, bitset, modulo]() {
        node->left = allocator.alloc();
        dispensoTaskSetTree(tasks, node->left, allocator, depth, (bitset << 1), modulo);
      },
      true);
  tasks.schedule(
      [&tasks, &allocator, node, depth, bitset, modulo]() {
        node->right = allocator.alloc();
        dispensoTaskSetTree(tasks, node->right, allocator, depth, (bitset << 1) | 1, modulo);
      },
      true);
}

template <size_t depth>
void BM_dispenso_taskset_tree(benchmark::State& state) {
  Allocator alloc;
  alloc.reset(depth);
  getModulos();

  uint32_t modulo;
  Node root;

  dispenso::ConcurrentTaskSet tasks(
      dispenso::globalThreadPool(), dispenso::ParentCascadeCancel::kOff, 2);

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    dispensoTaskSetTree(tasks, &root, alloc, depth, 1, modulo);
    tasks.wait();
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }

  checkTree(&root, depth, modulo);
}

void dispensoTaskSetTreeBulk(
    dispenso::ConcurrentTaskSet& tasks,
    Node* node,
    Allocator& allocator,
    uint32_t depth,
    uint32_t bitset,
    uint32_t modulo) {
  node->setValue(bitset, modulo);
  --depth;

  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return;
  }

  std::array<Node**, 2> children = {&node->left, &node->right};
  tasks.scheduleBulk(2, [&tasks, &allocator, children, depth, bitset, modulo](size_t i) {
    return [&tasks,
            &allocator,
            child = children[i],
            depth,
            bitset = (bitset << 1) | static_cast<uint32_t>(i),
            modulo]() {
      *child = allocator.alloc();
      dispensoTaskSetTreeBulk(tasks, *child, allocator, depth, bitset, modulo);
    };
  });
}

template <size_t depth>
void BM_dispenso_taskset_tree_bulk(benchmark::State& state) {
  Allocator alloc;
  alloc.reset(depth);
  getModulos();

  uint32_t modulo;
  Node root;

  dispenso::ConcurrentTaskSet tasks(
      dispenso::globalThreadPool(), dispenso::ParentCascadeCancel::kOff, 2);

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    dispensoTaskSetTreeBulk(tasks, &root, alloc, depth, 1, modulo);
    tasks.wait();
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }

  checkTree(&root, depth, modulo);
}

dispenso::Future<Node*>
dispensoTreeWhenAll(Allocator& allocator, uint32_t depth, uint32_t bitset, uint32_t modulo) {
  --depth;
  Node* node = allocator.alloc();
  node->setValue(bitset, modulo);
  if (!depth) {
    node->left = nullptr;
    node->right = nullptr;
    return dispenso::make_ready_future(node);
  }

  auto left = dispenso::async([depth, bitset, modulo, &allocator]() {
    return dispensoTreeWhenAll(allocator, depth, (bitset << 1), modulo);
  });
  auto right = dispenso::async([depth, bitset, modulo, &allocator]() {
    return dispensoTreeWhenAll(allocator, depth, (bitset << 1) | 1, modulo);
  });
  return dispenso::when_all(left, right).then([node](auto&& both) {
    auto& tuple = both.get();
    node->left = std::get<0>(tuple).get().get();
    node->right = std::get<1>(tuple).get().get();
    return node;
  });
}

template <size_t depth>
void BM_dispenso_tree_when_all(benchmark::State& state) {
  Allocator alloc;
  alloc.reset(depth);
  getModulos();
  dispenso::globalThreadPool();

  uint32_t modulo;

  Node* root;

  size_t m = 0;

  for (auto UNUSED_VAR : state) {
    alloc.reset(depth);
    modulo = getModulos()[m];
    root = dispensoTreeWhenAll(alloc, depth, 1, modulo).get();
    m = (m + 1 == getModulos().size()) ? 0 : m + 1;
  }

  checkTree(root, depth, modulo);
}

BENCHMARK_TEMPLATE(BM_serial_tree, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_serial_tree, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_serial_tree, kLargeSize)->UseRealTime();

BENCHMARK_TEMPLATE(BM_std_tree, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_std_tree, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_std_tree, kLargeSize)->UseRealTime();

#if !defined(BENCHMARK_WITHOUT_FOLLY)
BENCHMARK_TEMPLATE(BM_folly_tree, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_folly_tree, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_folly_tree, kLargeSize)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_FOLLY

BENCHMARK_TEMPLATE(BM_dispenso_tree, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_tree, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_tree, kLargeSize)->UseRealTime();

BENCHMARK_TEMPLATE(BM_dispenso_taskset_tree, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_taskset_tree, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_taskset_tree, kLargeSize)->UseRealTime();

BENCHMARK_TEMPLATE(BM_dispenso_taskset_tree_bulk, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_taskset_tree_bulk, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_taskset_tree_bulk, kLargeSize)->UseRealTime();

BENCHMARK_TEMPLATE(BM_dispenso_tree_when_all, kSmallSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_tree_when_all, kMediumSize)->UseRealTime();
BENCHMARK_TEMPLATE(BM_dispenso_tree_when_all, kLargeSize)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/graph_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <benchmark/benchmark.h>
#include <taskflow/taskflow.hpp>
#include <array>
#include <numeric>
#include <random>

#include <dispenso/graph.h>
#include <dispenso/graph_executor.h>

//--------------------------------------------------------------------------------
//                                    Taskflow
//--------------------------------------------------------------------------------
class TaskFlowBigTree {
 public:
  static size_t sizeOfLevel(size_t level) {
    return 1ul << (numBits_ - shiftStep_ * level);
  }
  void buildTree() {
    for (size_t level = 1; level < numLevels_; ++level) {
      buildLevel(level);
    }
  }

  void allocateMemory() {
    // the goal of this task is to calculate the sum of the numbers in this array
    for (size_t level = 1; level < numLevels_; ++level) {
      const size_t s = sizeOfLevel(level);
      data_[level].resize(s, 0lu);
    }

    // zero level data is input data for calculation
    data_[0].resize(1ul << numBits_);
    std::iota(data_[0].begin(), data_[0].end(), 0);
  }
  void initData() {
    for (size_t level = 1; level < numLevels_; ++level) {
      std::fill(data_[level].begin(), data_[level].end(), 0.f);
    }
    std::iota(data_[0].begin(), data_[0].end(), 0);
  }

  void buildLevel(size_t level) {
    std::vector<tf::Task>& tasks = levelTasks_[level];

    const size_t numNodes = 1ul << (numBits_ - shiftStep_ * level);

    for (size_t n = 0; n < numNodes; ++n) {
      tasks.push_back(taskflow_.emplace([this, level, n]() {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          data_[level][n] += data_[level - 1][index];
        }
      }));
      if (level > 1) {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          levelTasks_[level][n].succeed(levelTasks_[level - 1][index]);
        }
      }
    }
  }

  bool testTree() {
    const size_t result = std::accumulate(this->data_[0].begin(), this->data_[0].end(), size_t(0));
    executor_.run(taskflow_).wait();
    const bool isTestPass = data_[numLevels_ - 1][0] == result;
    initData();
    return isTestPass;
  }
  static constexpr size_t shiftStep_ = 4; // 4
  static constexpr size_t numBits_ = 5 * shiftStep_; // 5 // should be divisible by shiftStep
  static constexpr size_t numPredecessors_ = 1 << shiftStep_;
  static constexpr size_t startShnft_ = shiftStep_;
  static constexpr size_t numLevels_ = numBits_ / shiftStep_ + 1;
  static constexpr size_t level0Size_ = 1ul << numBits_;

  std::array<std::vector<size_t>, numLevels_> data_;
  std::array<std::vector<tf::Task>, numLevels_> levelTasks_;
  tf::Taskflow taskflow_;
  tf::Executor executor_;
};
//--------------------------------------------------------------------------------
//                                dispenso::Graph
//--------------------------------------------------------------------------------
template <class G>
class BigTree {
 public:
  static size_t sizeOfLevel(size_t level) {
    return 1ul << (numBits_ - shiftStep_ * level);
  }
  void buildTree() {
    for (size_t level = 1; level < numLevels_; ++level) {
      subgraphs_[level] = &g_.addSubgraph();
      buildLevel(level);
    }
  }

  void allocateMemory() {
    // the goal of this task is to calculate the sum of the numbers in this array
    for (size_t level = 1; level < numLevels_; ++level) {
      const size_t s = sizeOfLevel(level);
      data_[level].resize(s, 0lu);
    }

    // zero level data is input data for calculation
    data_[0].resize(1ul << numBits_);
    std::iota(data_[0].begin(), data_[0].end(), 0);
  }

  void initData() {
    for (size_t level = 1; level < numLevels_; ++level) {
      std::fill(data_[level].begin(), data_[level].end(), 0.f);
    }
    std::iota(data_[0].begin(), data_[0].end(), 0);
  }

  void buildLevel(size_t level) {
    SubGraphType* subgraph = subgraphs_[level];

    const size_t numNodes = 1ul << (numBits_ - shiftStep_ * level);
    subgraph->reserve(numNodes);

    for (size_t n = 0; n < numNodes; ++n) {
      subgraph->addNode([this, level, n]() {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          data_[level][n] += data_[level - 1][index];
        }
      });
      if (level > 1) {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          subgraphs_[level]->node(n).dependsOn(subgraphs_[level - 1]->node(index));
        }
      }
    }
  }

  bool testTree() {
    const size_t result = std::accumulate(this->data_[0].begin(), this->data_[0].end(), size_t(0));

    dispenso::ThreadPool& threadPool = dispenso::globalThreadPool();
    dispenso::ConcurrentTaskSet concurrentTaskSet(threadPool);
    setAllNodesIncomplete(g_);
    executor_(concurrentTaskSet, g_);
    concurrentTaskSet.wait();

    const bool isTestPass = data_[numLevels_ - 1][0] == result;
    initData();
    return isTestPass;
  }
  static constexpr size_t shiftStep_ = 4; // 4
  static constexpr size_t numBits_ = 5 * shiftStep_; // 5 // should be divisible by shiftStep
  static constexpr size_t numPredecessors_ = 1 << shiftStep_;
  static constexpr size_t startShnft_ = shiftStep_;
  static constexpr size_t numLevels_ = numBits_ / shiftStep_ + 1;
  static constexpr size_t level0Size_ = 1ul << numBits_;

  using SubGraphType = typename G::SubgraphType;

  std::array<std::vector<size_t>, numLevels_> data_;
  std::array<SubGraphType*, numLevels_> subgraphs_;
  G g_;
  dispenso::ConcurrentTaskSetExecutor executor_;
};

static void BM_taskflow_build_big_tree(benchmark::State& state) {
  TaskFlowBigTree bigTree;
  bigTree.allocateMemory();
  for (auto _ : state) {
    bigTree.buildTree();
    assert(bigTree.testTree());
    bigTree.taskflow_.clear();
    for (uint32_t i = 0; i < TaskFlowBigTree::numLevels_; ++i) {
      bigTree.levelTasks_[i].clear();
    }
  }
}

template <class G>
static void BM_build_big_tree(benchmark::State& state) {
  BigTree<G> bigTree;
  bigTree.allocateMemory();
  for (auto _ : state) {
    bigTree.buildTree();
    assert(bigTree.testTree());
    bigTree.g_.clear();
  }
}

static void BM_build_bi_prop_dependency_chain(benchmark::State& state) {
  size_t counter;
  for (auto _ : state) {
    dispenso::BiPropGraph graph;
    dispenso::BiPropNode* prevNode = &graph.addNode([&counter]() { counter++; });
    for (size_t i = 1; i < 1024; ++i) {
      dispenso::BiPropNode* node = &graph.addNode([&counter]() { counter++; });
      prevNode->biPropDependsOn(*node);
      prevNode = node;
    }
  }
}

template <class G>
static void BM_build_dependency_chain(benchmark::State& state) {
  size_t counter;
  for (auto _ : state) {
    G graph;
    typename G::NodeType* prevNode = &graph.addNode([&counter]() { counter++; });
    for (size_t i = 1; i < 1024; ++i) {
      typename G::NodeType* node = &graph.addNode([&counter]() { counter++; });
      prevNode->dependsOn(*node);
      prevNode = node;
    }
  }
}

template <class G>
static void BM_execute_dependency_chain(benchmark::State& state) {
  size_t counter;
  G graph;
  typename G::NodeType* prevNode = &graph.addNode([&counter]() { counter++; });
  for (size_t i = 1; i < 1024; ++i) {
    typename G::NodeType* node = &graph.addNode([&counter]() { counter++; });
    prevNode->dependsOn(*node);
    prevNode = node;
  }

  dispenso::SingleThreadExecutor singleThreadExecutor;
  for (auto _ : state) {
    setAllNodesIncomplete(graph);
    counter = 0;
    singleThreadExecutor(graph);
  }
}

static void BM_build_bi_prop_dependency_group(benchmark::State& state) {
  size_t counter;
  constexpr size_t numNodes = 4;
  std::array<dispenso::BiPropNode*, numNodes> nodes1, nodes2;
  for (auto _ : state) {
    dispenso::BiPropGraph graph;
    for (size_t i = 0; i < numNodes; ++i) {
      nodes1[i] = &graph.addNode([&counter]() { counter++; });
      nodes2[i] = &graph.addNode([&counter]() { counter++; });
      nodes1[i]->biPropDependsOn(*nodes2[i]);
    }
    for (size_t i = 1; i < numNodes; ++i) {
      nodes1[i - 1]->biPropDependsOn(*nodes1[i]);
    }
  }
}

template <class G>
static void BM_forward_propagator_node(benchmark::State& state) {
  G graph;
  constexpr size_t numNodes = 32768;
  std::array<dispenso::Node*, numNodes> nodes;

  size_t counter;

  std::mt19937 rng(12345);

  nodes[0] = &graph.addNode([&counter]() { counter++; }); // root
  for (size_t i = 1; i < numNodes; ++i) {
    nodes[i] = &graph.addNode([&counter]() { counter++; });
    std::uniform_int_distribution<> parentDistr(0, i - 1);
    nodes[i]->dependsOn(*nodes[parentDistr(rng)]);
  }

  dispenso::ForwardPropagator forwardPropagator;

  for (auto _ : state) {
    state.PauseTiming();
    graph.forEachNode([](const dispenso::Node& node) { node.setCompleted(); });

    nodes[0]->setIncomplete();
    state.ResumeTiming();
    forwardPropagator(graph);
  }
}

BENCHMARK(BM_taskflow_build_big_tree);
BENCHMARK_TEMPLATE(BM_build_big_tree, dispenso::Graph);
BENCHMARK_TEMPLATE(BM_build_big_tree, dispenso::BiPropGraph);
BENCHMARK(BM_build_bi_prop_dependency_chain);
BENCHMARK(BM_build_bi_prop_dependency_group);
BENCHMARK_TEMPLATE(BM_build_dependency_chain, dispenso::Graph);
BENCHMARK_TEMPLATE(BM_build_dependency_chain, dispenso::BiPropGraph);
BENCHMARK_TEMPLATE(BM_execute_dependency_chain, dispenso::Graph);
BENCHMARK_TEMPLATE(BM_execute_dependency_chain, dispenso::BiPropGraph);
BENCHMARK_TEMPLATE(BM_forward_propagator_node, dispenso::Graph);
BENCHMARK_TEMPLATE(BM_forward_propagator_node, dispenso::BiPropGraph);

BENCHMARK_MAIN();


================================================
FILE: benchmarks/graph_scene_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <benchmark/benchmark.h>
#include <dispenso/graph.h>
#include <dispenso/graph_executor.h>
#include <taskflow/taskflow.hpp>
#if TF_VERSION > 300000
#include <taskflow/algorithm/for_each.hpp>
#endif // TF_VERSION
#include <array>
#include <numeric>
#include <random>
// For this benchmarks we create set of tasks similar to the scene graph.
// Scene consist of the hierarchy of the Transforms which have local space transformation matrix.
// World matrix should be calculated as multiplication of the parent world matrix and local space
// matrix of the transform. Some transforms have two geometry index. inGeoIdex - is index in source
// geometry array (which generated by functions). This geometry should be transformed by worldMatrix
// and stored into outGeo array.
//                                                    generateGeo─┐
//                                                                │
//                                                             4  │
//     transform 2 ◀──────┐   transform 5                ┌─   ─┬──▼─────┬─   ─┐
//    ┌─────────────┐     │  ┌───────────────┐   inGeo:  │ ... │ points │ ... │
//    │ parent      │     └────parent       2│           └─   ─┴──┬─────┴─   ─┘
//    │ matrix      │   ┌──◀───matrix        │                    │
//    │ worldMatrix─────*────▶ worldMatrix   │───────▶────────────* transformGeo
//    │ inGeoIndex  │        │ inGeoIndex   4│                  9 │
//    │ outGeoIndex │        │ outGeoIndex  9│           ┌──  ─┬──▼─────┬─   ─┐
//    └─────────────┘        └───────────────┘   outGeo: │ ... │ points │ ... │
//             calculateWorldMatrix                      └──  ─┴────────┴─   ─┘

namespace {
namespace params {
// parameters of the scene
constexpr size_t numTransforms = 1000; // number transforms in hierarchy
constexpr size_t numInGeo = 100; // number unique geometries
constexpr size_t numVertexMultiplier = 1024;
constexpr size_t everyNthHasGeo = 2; // probability that transform has geo is 1/everyNthHasGeo
} // namespace params

using Vec3 = std::array<float, 3>;
using Matrix4 = std::array<float, 16>;

constexpr size_t kNoGeometry = std::numeric_limits<size_t>::max();
constexpr float kPi = 3.14159265358979323846f;
constexpr size_t kRoot = std::numeric_limits<size_t>::max();
struct Transform {
  Matrix4 matrix;
  Matrix4 worldMatrix;
  size_t parent = kRoot;
  size_t inGeoIndex = kNoGeometry;
  size_t outGeoIndex = kNoGeometry;
};

using Geometry = std::vector<Vec3>;

struct Scene {
  std::vector<Transform> transforms;
  std::vector<Geometry> inGeo;
  std::vector<Geometry> outGeo;
};
//--------------------------------------------------------------------------------
void branchlessONB(const Vec3& n, Vec3& b1, Vec3& b2) {
  const float sign = std::copysign(1.f, n[2]);
  const float a = -1.0f / (sign + n[2]);
  const float b = n[0] * n[1] * a;
  b1 = Vec3{1.0f + sign * n[0] * n[0] * a, sign * b, -sign * n[0]};
  b2 = Vec3{b, sign + n[1] * n[1] * a, -n[1]};
}

Matrix4 getRandomTransformMatrix(std::mt19937& rng) {
  std::uniform_real_distribution<float> thetaDistr(0.f, 2.f * kPi);
  std::uniform_real_distribution<float> uDistr(-1.f, 1.f);
  const float theta = thetaDistr(rng);
  const float u = uDistr(rng);
  const float sq = std::sqrt(1 - u * u);
  const Vec3 dirY = {sq * std::cos(theta), sq * std::sin(theta), u};
  Vec3 dirX, dirZ;
  branchlessONB(dirY, dirZ, dirX);
  const Vec3 pos{uDistr(rng), uDistr(rng), uDistr(rng)};
  // clang-format off
  return {dirX[0], dirX[1], dirX[2], 0.f,
          dirY[0], dirY[1], dirY[2], 0.f,
          dirZ[0], dirZ[1], dirZ[2], 0.f,
          pos[0],  pos[1],  pos[2],  1.f };
  // clang-format on
}

Scene generateTransformsHierarchy(
    size_t numTransforms,
    size_t numInGeo,
    size_t everyNthHasGeo,
    std::mt19937& rng) {
  std::uniform_int_distribution<size_t> distGeom(0, numInGeo - 1);
  std::uniform_int_distribution<size_t> distProb(0, everyNthHasGeo - 1);

  Scene scene;
  scene.inGeo.resize(numInGeo);
  std::vector<Transform>& transforms = scene.transforms;
  transforms.resize(numTransforms);
  transforms[0].matrix = getRandomTransformMatrix(rng);
  transforms[0].worldMatrix = transforms[0].matrix;
  size_t outGeoCounter = 0;
  for (size_t i = 1; i < numTransforms; ++i) {
    Transform& transform = transforms[i];
    std::uniform_int_distribution<size_t> dist(0, i - 1);
    transform.parent = dist(rng);
    transform.matrix = getRandomTransformMatrix(rng);
    if (distProb(rng) == 0) {
      transform.inGeoIndex = distGeom(rng);
      transform.outGeoIndex = outGeoCounter++;
    }
  }
  scene.outGeo.resize(outGeoCounter);
  return scene;
}
//--------------------------------compute functions-------------------------------
Vec3 multiply(const Vec3& v, const Matrix4& m) {
  const float invertW = 1.f / (m[11] * v[2] + m[7] * v[1] + m[3] * v[0] + m[15]);
  return {
      (m[8] * v[2] + m[4] * v[1] + m[0] * v[0] + m[12]) * invertW,
      (m[9] * v[2] + m[5] * v[1] + m[1] * v[0] + m[13]) * invertW,
      (m[10] * v[2] + m[6] * v[1] + m[2] * v[0] + m[14]) * invertW};
}

Matrix4 multiply(const Matrix4& ma, const Matrix4& mb) {
  return {
      ma[2] * mb[8] + ma[1] * mb[4] + ma[3] * mb[12] + ma[0] * mb[0],
      ma[2] * mb[9] + ma[1] * mb[5] + ma[3] * mb[13] + ma[0] * mb[1],
      ma[1] * mb[6] + ma[0] * mb[2] + ma[3] * mb[14] + ma[2] * mb[10],
      ma[1] * mb[7] + ma[0] * mb[3] + ma[3] * mb[15] + ma[2] * mb[11],
      ma[6] * mb[8] + ma[5] * mb[4] + ma[7] * mb[12] + ma[4] * mb[0],
      ma[6] * mb[9] + ma[5] * mb[5] + ma[7] * mb[13] + ma[4] * mb[1],
      ma[5] * mb[6] + ma[4] * mb[2] + ma[7] * mb[14] + ma[6] * mb[10],
      ma[5] * mb[7] + ma[4] * mb[3] + ma[7] * mb[15] + ma[6] * mb[11],
      ma[10] * mb[8] + ma[9] * mb[4] + ma[11] * mb[12] + ma[8] * mb[0],
      ma[10] * mb[9] + ma[9] * mb[5] + ma[11] * mb[13] + ma[8] * mb[1],
      ma[9] * mb[6] + ma[8] * mb[2] + ma[11] * mb[14] + ma[10] * mb[10],
      ma[9] * mb[7] + ma[8] * mb[3] + ma[11] * mb[15] + ma[10] * mb[11],
      ma[14] * mb[8] + ma[13] * mb[4] + ma[15] * mb[12] + ma[12] * mb[0],
      ma[14] * mb[9] + ma[13] * mb[5] + ma[15] * mb[13] + ma[12] * mb[1],
      ma[13] * mb[6] + ma[12] * mb[2] + ma[15] * mb[14] + ma[14] * mb[10],
      ma[13] * mb[7] + ma[12] * mb[3] + ma[15] * mb[15] + ma[14] * mb[11]};
}

void calculateWorldMatrix(std::vector<Transform>& transforms, size_t index) {
  Transform& transform = transforms[index];
  if (transform.parent == kRoot) {
    transform.worldMatrix = transform.matrix;
  } else {
    transform.worldMatrix = multiply(transform.matrix, transforms[transform.parent].worldMatrix);
  }
}

size_t numGeoPoints(size_t inGeoIndex) {
  return (inGeoIndex + 1) * params::numVertexMultiplier;
}

Vec3 calculateGeoPoint(size_t inGeoIndex, size_t pointIndex) {
  size_t numPoints = numGeoPoints(inGeoIndex);
  const float tStep = 2.f * 2.f * kPi / (static_cast<float>(numPoints) - 1.f);
  const float t = tStep * static_cast<float>(pointIndex);

  const float r = 0.01;
  return {r * t * std::cos(t), r * t * std::sin(t), r * (t + std::sin(16 * t))};
}
//--------------------------------------------------------------------------------
//                                    taskflow
//--------------------------------------------------------------------------------
tf::Task generateGeoTF(tf::Taskflow& taskflow, std::vector<Geometry>& inGeo, size_t inGeoIndex) {
  size_t numPoints = numGeoPoints(inGeoIndex);
  Geometry& g = inGeo[inGeoIndex];
  g.resize(numPoints);

  return taskflow.for_each_index(
      size_t(0), numPoints, size_t(1), [&](size_t i) { g[i] = calculateGeoPoint(inGeoIndex, i); });
}

tf::Task
transformGeoTF(tf::Taskflow& taskflow, Geometry& g, const Geometry& inG, const Matrix4& m) {
  const size_t numPoints = inG.size();

  g.resize(numPoints);

  return taskflow.for_each_index(
      size_t(0), numPoints, size_t(1), [&](size_t i) { g[i] = multiply(inG[i], m); });
}

void prepareGraphTF(tf::Taskflow& taskflow, Scene& scene) {
  std::mt19937 rng(12345);

  scene = generateTransformsHierarchy(
      params::numTransforms, params::numInGeo, params::everyNthHasGeo, rng);

  std::vector<Geometry>& inGeo = scene.inGeo;
  std::vector<Transform>& transforms = scene.transforms;
  std::vector<Geometry>& outGeo = scene.outGeo;
  // calculate inGeo
  std::vector<tf::Task> inGeoTasks(params::numInGeo);

  for (size_t i = 0; i < params::numInGeo; ++i) {
    inGeoTasks[i] = generateGeoTF(taskflow, inGeo, i);
  }

  // calculate transforms
  std::vector<tf::Task> transformTasks(params::numTransforms);
  std::vector<tf::Task> outGeoTasks(outGeo.size());
  for (size_t i = 0; i < params::numTransforms; ++i) {
    transformTasks[i] =
        taskflow.emplace([&transforms, i]() { calculateWorldMatrix(transforms, i); });
    const size_t parentIndex = transforms[i].parent;
    if (parentIndex != kRoot) {
      transformTasks[i].succeed(transformTasks[parentIndex]);
    }

    // calculate outGeo
    const size_t outGeoIndex = transforms[i].outGeoIndex;
    const size_t inGeoIndex = transforms[i].inGeoIndex;
    if (inGeoIndex != kNoGeometry) {
      outGeoTasks[outGeoIndex] = transformGeoTF(
          taskflow, outGeo[outGeoIndex], inGeo[inGeoIndex], transforms[i].worldMatrix);

      outGeoTasks[outGeoIndex].succeed(transformTasks[i], inGeoTasks[inGeoIndex]);
    }
  }
}
//--------------------------------------------------------------------------------
//                                    dispenso
//--------------------------------------------------------------------------------
void generateGeo(
    dispenso::ThreadPool& threadPool,
    std::vector<Geometry>& inGeo,
    size_t inGeoIndex) {
  size_t numPoints = numGeoPoints(inGeoIndex);
  Geometry& g = inGeo[inGeoIndex];
  g.resize(numPoints);

  dispenso::TaskSet taskSet(threadPool);

  dispenso::ParForOptions options;
  options.maxThreads = static_cast<uint32_t>(numPoints);
  options.minItemsPerChunk = 256;

  dispenso::parallel_for(
      taskSet, 0, numPoints, [&](size_t i) { g[i] = calculateGeoPoint(inGeoIndex, i); }, options);
}

void transformGeo(
    dispenso::ThreadPool& threadPool,
    Geometry& g,
    const Geometry& inG,
    const Matrix4& m) {
  dispenso::TaskSet taskSet(threadPool);
  const size_t numPoints = inG.size();
  dispenso::ParForOptions options;
  options.maxThreads = static_cast<uint32_t>(numPoints);
  options.minItemsPerChunk = 256;

  g.resize(numPoints);

  dispenso::parallel_for(
      taskSet, 0, numPoints, [&](size_t i) { g[i] = multiply(inG[i], m); }, options);
}

struct Subgraphs {
  dispenso::Subgraph* inGeo;
  dispenso::Subgraph* transforms;
  dispenso::Subgraph* outGeo;
};
Subgraphs prepareGraph(dispenso::ThreadPool& threadPool, Scene& scene, dispenso::Graph& g) {
  std::mt19937 rng(12345);

  scene = generateTransformsHierarchy(
      params::numTransforms, params::numInGeo, params::everyNthHasGeo, rng);

  std::vector<Geometry>& inGeo = scene.inGeo;
  std::vector<Transform>& transforms = scene.transforms;
  std::vector<Geometry>& outGeo = scene.outGeo;
  // calculate inGeo
  dispenso::Subgraph& transformsSub = g.addSubgraph();
  dispenso::Subgraph& inGeoSub = g.addSubgraph();
  dispenso::Subgraph& outGeoSub = g.addSubgraph();

  // Reserve capacity for known sizes
  transformsSub.reserve(params::numTransforms);
  inGeoSub.reserve(params::numInGeo);

  for (size_t i = 0; i < params::numInGeo; ++i) {
    inGeoSub.addNode([&threadPool, &inGeo, i]() { generateGeo(threadPool, inGeo, i); });
  }

  // calculate transforms
  for (size_t i = 0; i < params::numTransforms; ++i) {
    transformsSub.addNode([&transforms, i]() { calculateWorldMatrix(transforms, i); });
    const size_t parentIndex = transforms[i].parent;
    if (parentIndex != kRoot) {
      transformsSub.node(i).dependsOn(transformsSub.node(parentIndex));
    }

    // calculate outGeo
    const size_t outGeoIndex = transforms[i].outGeoIndex;
    const size_t inGeoIndex = transforms[i].inGeoIndex;
    if (inGeoIndex != kNoGeometry) {
      outGeoSub.addNode([&, i, inGeoIndex, outGeoIndex]() {
        transformGeo(threadPool, outGeo[outGeoIndex], inGeo[inGeoIndex], transforms[i].worldMatrix);
      });
      outGeoSub.node(outGeoIndex).dependsOn(transformsSub.node(i), inGeoSub.node(inGeoIndex));
    }
  }

  return {&inGeoSub, &transformsSub, &outGeoSub};
}

//----------------------------------test results----------------------------------
#ifndef NDEBUG
template <size_t N>
bool compare(const std::array<float, N>& ma, const std::array<float, N>& mb) {
  bool result = true;
  for (size_t i = 0; i < N; ++i) {
    result = result && std::abs(ma[i] - mb[i]) < std::numeric_limits<float>::epsilon();
  }
  return result;
}

bool testScene(const Scene& scene) {
  bool result = true;
  for (const Transform& transform : scene.transforms) {
    const Matrix4 worldMatrix = transform.parent == kRoot
        ? transform.matrix
        : multiply(transform.matrix, scene.transforms[transform.parent].worldMatrix);
    result = result && compare(transform.worldMatrix, worldMatrix);
  }

  for (const Transform& transform : scene.transforms) {
    const size_t outGeoIndex = transform.outGeoIndex;
    const size_t inGeoIndex = transform.inGeoIndex;
    if (inGeoIndex != kNoGeometry) {
      const size_t numVertices = scene.inGeo[inGeoIndex].size();
      assert(numVertices == scene.outGeo[outGeoIndex].size());
      for (size_t i = 0; i < numVertices; ++i) {
        const Vec3 worldPos = multiply(scene.inGeo[inGeoIndex][i], transform.worldMatrix);
        result = result && compare(scene.outGeo[outGeoIndex][i], worldPos);
      }
    }
  }
  return result;
}
void cleanScene(Scene& s) {
  for (Geometry& g : s.inGeo) {
    g.clear();
  }
  for (Geometry& g : s.outGeo) {
    g.clear();
  }
  for (Transform& t : s.transforms) {
    std::fill(t.worldMatrix.begin(), t.worldMatrix.end(), 0.f);
  }
}

#endif
} // anonymous namespace

static void BM_scene_graph_parallel_for(benchmark::State& state) {
  // transform inGeo
  dispenso::ThreadPool& threadPool = dispenso::globalThreadPool();

  dispenso::Graph g;
  Scene scene;
  prepareGraph(threadPool, scene, g);

  dispenso::ParallelForExecutor parallelForExecutor;

  dispenso::TaskSet taskSet(threadPool);

  for (auto _ : state) {
    state.PauseTiming();
    setAllNodesIncomplete(g);
    state.ResumeTiming();

    parallelForExecutor(taskSet, g);

#ifndef NDEBUG
    state.PauseTiming();
    assert(testScene(scene));
    cleanScene(scene);
    state.ResumeTiming();
#endif
  }
}

static void BM_scene_graph_concurrent_task_set(benchmark::State& state) {
  // transform inGeo
  dispenso::ThreadPool& threadPool = dispenso::globalThreadPool();

  dispenso::Graph g;
  Scene scene;
  prepareGraph(threadPool, scene, g);

  dispenso::ConcurrentTaskSet concurrentTaskSet(threadPool);
  dispenso::ConcurrentTaskSetExecutor concurrentTaskSetExecutor;

  for (auto _ : state) {
    state.PauseTiming();
    setAllNodesIncomplete(g);
    state.ResumeTiming();

    concurrentTaskSetExecutor(concurrentTaskSet, g);
    concurrentTaskSet.wait();

#ifndef NDEBUG
    state.PauseTiming();
    assert(testScene(scene));
    cleanScene(scene);
    state.ResumeTiming();
#endif
  }
}

static void BM_scene_graph_partial_revaluation(benchmark::State& state) {
  dispenso::ThreadPool& threadPool = dispenso::globalThreadPool();

  std::mt19937 rng(123456);

  dispenso::Graph g;
  Scene scene;
  Subgraphs subgraphs = prepareGraph(threadPool, scene, g);

  const size_t numTransforms = scene.transforms.size();
  std::uniform_real_distribution<float> transformIndexDistr(0, numTransforms - 1);

  dispenso::ConcurrentTaskSet concurrentTaskSet(threadPool);
  dispenso::ConcurrentTaskSetExecutor concurrentTaskSetExecutor;
  dispenso::ForwardPropagator forwardPropagator;

  setAllNodesIncomplete(g);
  concurrentTaskSetExecutor(concurrentTaskSet, g);
  concurrentTaskSet.wait();

  assert(testScene(scene));

  for (auto _ : state) {
    state.PauseTiming();
    // change several transforms
    rng.seed(123456);
    for (size_t i = 0; i < 10; ++i) {
      const size_t index = transformIndexDistr(rng);
      Transform& t = scene.transforms[index];
      t.matrix = getRandomTransformMatrix(rng);
      subgraphs.transforms->node(index).setIncomplete();
    }
    state.ResumeTiming();
    // The graph automatically recalculates all children of modified transforms. If these transforms
    // have geometry this geometry will be recomputed as well.
    forwardPropagator(g);
    concurrentTaskSetExecutor(concurrentTaskSet, g);
    concurrentTaskSet.wait();
#ifndef NDEBUG
    state.PauseTiming();
    assert(testScene(scene));
    state.ResumeTiming();
#endif
  }
}
// TODO(roman fedotov): Add partial evaluation variant for taskflow (possible implementation:
// conditional tasks)

static void BM_scene_graph_taskflow(benchmark::State& state) {
  tf::Taskflow taskflow;

  Scene scene;
  prepareGraphTF(taskflow, scene);

  tf::Executor executor;
  for (auto _ : state) {
    executor.run(taskflow).wait();

#ifndef NDEBUG
    state.PauseTiming();
    assert(testScene(scene));
    cleanScene(scene);
    state.ResumeTiming();
#endif
  }
}

BENCHMARK(BM_scene_graph_parallel_for)->UseRealTime();
BENCHMARK(BM_scene_graph_concurrent_task_set)->UseRealTime();
BENCHMARK(BM_scene_graph_taskflow)->UseRealTime();

#ifndef NDEBUG
BENCHMARK(BM_scene_graph_partial_revaluation)->UseRealTime()->Iterations(50);
#else
BENCHMARK(BM_scene_graph_partial_revaluation)->UseRealTime();
#endif

BENCHMARK_MAIN();


================================================
FILE: benchmarks/idle_pool_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * Benchmarks for thread pool behavior under low/idle workloads.
 * Tests CPU usage and responsiveness when threads are mostly waiting.
 */

#include <chrono>

#include <dispenso/task_set.h>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include <tbb/task_group.h>
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

using namespace std::chrono_literals;

static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 10000;
static constexpr int kLargeSize = 1000000;

struct alignas(64) Work {
  size_t count = 0;

  void operator+=(size_t o) {
    count += o;
  }
};

Work g_work[1025];
std::atomic<int> g_tCounter{0};
inline int testTid() {
  static DISPENSO_THREAD_LOCAL int t = -1;
  if (t < 0) {
    t = g_tCounter.fetch_add(1, std::memory_order_acq_rel);
  }
  return t;
}

inline Work& work() {
  static DISPENSO_THREAD_LOCAL Work* w = nullptr;

  if (!w) {
    if (testTid() == 0) {
      w = g_work + 1024;
    } else {
      w = g_work + (testTid() & 1023);
    }
  }
  return *w;
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb_mostly_idle(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  struct Recurse {
    void operator()() const {
      work() += i;
      if (i < num_elements) {
        ++i;
        g->run(*this);
      }
    }

    mutable int i;
    mutable tbb::task_group* g;
    int num_elements;
  };

  tbb_compat::task_scheduler_init initsched(num_threads);

  startRusage();

  for (auto UNUSED_VAR : state) {
    tbb::task_group g;
    Recurse rec;
    rec.i = 0;
    rec.g = &g;
    rec.num_elements = num_elements;
    rec();
    g.wait();
  }
  endRusage(state);
}

void BM_tbb_very_idle(benchmark::State& state) {
  const int num_threads = state.range(0);

  tbb_compat::task_scheduler_init initsched(num_threads);

  startRusage();

  for (auto UNUSED_VAR : state) {
    tbb::task_group g;
    g.run([]() {});
    std::this_thread::sleep_for(100ms);
    g.run([]() {});
    g.wait();
  }
  endRusage(state);
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_mostly_idle(benchmark::State& state) {
  const int num_threads = std::max<int>(1, state.range(0) - 1);
  const int num_elements = state.range(1);

  struct Recurse {
    void operator()() {
      work() += i;
      if (i < num_elements) {
        ++i;
        tasks->schedule(*this);
      }
    }

    int i;
    dispenso::ConcurrentTaskSet* tasks;
    int num_elements;
  };

  dispenso::ThreadPool pool(num_threads);

  startRusage();

  for (auto UNUSED_VAR : state) {
    dispenso::ConcurrentTaskSet tasks(pool);
    Recurse rec;
    rec.i = 0;
    rec.tasks = &tasks;
    rec.num_elements = num_elements;
    rec();
    tasks.wait();
  }

  endRusage(state);
}

void BM_dispenso_very_idle(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  dispenso::ThreadPool pool(num_threads);
  startRusage();

  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    tasks.schedule([]() {});
    std::this_thread::sleep_for(100ms);
    tasks.schedule([]() {});
    // TaskSet destructor waits for both tasks, mirroring TBB's g.wait()
  }

  endRusage(state);
}

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int s : pow2HalfStepThreads()) {
      b->Args({s, j});
    }
  }
}

static void CustomArgumentsVeryIdle(benchmark::internal::Benchmark* b) {
  for (int s : pow2HalfStepThreads()) {
    b->Args({s});
  }
}

#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_mostly_idle)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
BENCHMARK(BM_tbb_very_idle)
    ->Apply(CustomArgumentsVeryIdle)
    ->Unit(benchmark::kMicrosecond)
    ->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

BENCHMARK(BM_dispenso_mostly_idle)
    ->Apply(CustomArguments)
    ->Unit(benchmark::kMicrosecond)
    ->UseRealTime();
BENCHMARK(BM_dispenso_very_idle)
    ->Apply(CustomArgumentsVeryIdle)
    ->Unit(benchmark::kMicrosecond)
    ->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/locality_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * Benchmark for cache/NUMA-sensitive workloads.
 *
 * Tests a repeated stencil pattern over large arrays (exceeding L3 cache)
 * where deterministic thread-to-memory mapping (kStatic) may outperform
 * dynamic chunking (kAuto) due to cache locality and NUMA effects.
 *
 * The work per element is intentionally trivial (memory-bound) so that
 * memory access patterns dominate performance, not compute.
 */

// MSVC uses __restrict, GCC/Clang use __restrict__
#ifdef _MSC_VER
#define RESTRICT __restrict
#else
#define RESTRICT __restrict__
#endif

#include <cstring>
#include <numeric>
#include <vector>

#include <dispenso/parallel_for.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_for.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

// ~32MB per array (4M doubles). Two arrays = 64MB, exceeds typical per-core L2
// but may fit in shared L3 on some machines. Scale up if needed.
static constexpr size_t kSmallSize = 100000; // ~800KB per array
static constexpr size_t kMediumSize = 4000000; // ~32MB per array
static constexpr size_t kLargeSize = 32000000; // ~256MB per array

static constexpr int kPasses = 10;

// Initialize arrays with deterministic data.
static void initArrays(std::vector<double>& input, std::vector<double>& output, size_t n) {
  input.resize(n);
  output.resize(n);
  for (size_t i = 0; i < n; ++i) {
    input[i] = static_cast<double>(i % 1000) * 0.001;
  }
  std::memset(output.data(), 0, n * sizeof(double));
}

// Simple 3-point stencil: output[i] = 0.25 * (input[i-1] + 2*input[i] + input[i+1])
// Cheap compute, memory-bound. Multiple passes amplify locality effects.
inline void
stencilPass(const double* RESTRICT src, double* RESTRICT dst, size_t begin, size_t end, size_t n) {
  for (size_t i = begin; i < end; ++i) {
    size_t im = (i > 0) ? i - 1 : 0;
    size_t ip = (i < n - 1) ? i + 1 : n - 1;
    dst[i] = 0.25 * (src[im] + 2.0 * src[i] + src[ip]);
  }
}

// Verify output is non-garbage (spot-check a few values).
static void checkOutput(const double* data, size_t n) {
  if (n == 0)
    return;
  // After stencil passes, values should be finite and in a reasonable range.
  for (size_t i = 0; i < n; i += n / 10 + 1) {
    if (!std::isfinite(data[i])) {
      std::cerr << "FAIL: non-finite value at index " << i << std::endl;
      abort();
    }
  }
}

template <size_t num_elements>
void BM_serial(benchmark::State& state) {
  std::vector<double> input, output;
  initArrays(input, output, num_elements);

  for (auto UNUSED_VAR : state) {
    for (int pass = 0; pass < kPasses; ++pass) {
      stencilPass(input.data(), output.data(), 0, num_elements, num_elements);
      std::swap(input, output);
    }
  }
  checkOutput(input.data(), num_elements);
}

void BM_dispenso_static(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const size_t num_elements = state.range(1);

  std::vector<double> input, output;
  initArrays(input, output, num_elements);

  dispenso::ThreadPool pool(num_threads);

  for (auto UNUSED_VAR : state) {
    for (int pass = 0; pass < kPasses; ++pass) {
      dispenso::TaskSet tasks(pool);
      const double* src = input.data();
      double* dst = output.data();
      auto range =
          dispenso::makeChunkedRange(size_t{0}, num_elements, dispenso::ParForChunking::kStatic);
      dispenso::parallel_for(tasks, range, [src, dst, num_elements](size_t begin, size_t end) {
        stencilPass(src, dst, begin, end, num_elements);
      });
      std::swap(input, output);
    }
  }
  checkOutput(input.data(), num_elements);
}

void BM_dispenso_auto(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const size_t num_elements = state.range(1);

  std::vector<double> input, output;
  initArrays(input, output, num_elements);

  dispenso::ThreadPool pool(num_threads);

  for (auto UNUSED_VAR : state) {
    for (int pass = 0; pass < kPasses; ++pass) {
      dispenso::TaskSet tasks(pool);
      const double* src = input.data();
      double* dst = output.data();
      auto range =
          dispenso::makeChunkedRange(size_t{0}, num_elements, dispenso::ParForChunking::kAuto);
      dispenso::parallel_for(tasks, range, [src, dst, num_elements](size_t begin, size_t end) {
        stencilPass(src, dst, begin, end, num_elements);
      });
      std::swap(input, output);
    }
  }
  checkOutput(input.data(), num_elements);
}

#if defined(_OPENMP)
void BM_omp(benchmark::State& state) {
  const int num_threads = state.range(0);
  const size_t num_elements = state.range(1);

  std::vector<double> input, output;
  initArrays(input, output, num_elements);

  omp_set_num_threads(num_threads);

  for (auto UNUSED_VAR : state) {
    for (int pass = 0; pass < kPasses; ++pass) {
      const double* src = input.data();
      double* dst = output.data();
#pragma omp parallel for schedule(static)
      for (int64_t i = 0; i < static_cast<int64_t>(num_elements); ++i) {
        size_t im = (i > 0) ? i - 1 : 0;
        size_t ip = (i < num_elements - 1) ? i + 1 : num_elements - 1;
        dst[i] = 0.25 * (src[im] + 2.0 * src[i] + src[ip]);
      }
      std::swap(input, output);
    }
  }
  checkOutput(input.data(), num_elements);
}
#endif /* defined(_OPENMP) */

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int num_threads = state.range(0);
  const size_t num_elements = state.range(1);

  std::vector<double> input, output;
  initArrays(input, output, num_elements);

  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(num_threads);
    for (int pass = 0; pass < kPasses; ++pass) {
      const double* src = input.data();
      double* dst = output.data();
      tbb::parallel_for(
          tbb::blocked_range<size_t>(0, num_elements),
          [src, dst, num_elements](const tbb::blocked_range<size_t>& r) {
            stencilPass(src, dst, r.begin(), r.end(), num_elements);
          });
      std::swap(input, output);
    }
  }
  checkOutput(input.data(), num_elements);
}
#endif // !BENCHMARK_WITHOUT_TBB

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (size_t j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : {1, 4, 16, 64, 128}) {
      b->Args({i, static_cast<int64_t>(j)});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
BENCHMARK_TEMPLATE(BM_serial, kLargeSize);

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso_static)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_auto)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/nested_for_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cmath>

#include <dispenso/parallel_for.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_reduce.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

namespace {
constexpr int kWorkMultiplier = 4;
constexpr int kSmallSize = 10;
constexpr int kMediumSize = 500;
constexpr int kLargeSize = 2500;

int g_numThreads = 0;
} // namespace

uint32_t getInputs(int numElements) {
  srand(numElements);
  return rand() & 127;
}

inline uint64_t calculate(uint64_t input, uint64_t index, size_t foo) {
  return std::cos(
      std::log(
          std::sin(std::exp(std::sqrt(static_cast<double>((input ^ index) - 3 * foo * input))))));
}

uint64_t calculateInnerSerial(uint64_t input, size_t foo, int numElements) {
  uint64_t sum = 0;
  for (size_t i = 0; i < kWorkMultiplier * numElements; ++i) {
    sum += calculate(input, i, foo);
  }
  return sum;
}

void checkResults(uint32_t input, uint64_t actual, int foo, size_t numElements) {
  if (!foo)
    return;
  if (input != getInputs(numElements)) {
    std::cerr << "Failed to recover input!" << std::endl;
    abort();
  }
  uint64_t expected = 0;
  for (size_t i = 0; i < numElements; ++i) {
    expected += calculateInnerSerial(input, foo, numElements);
  }
  if (expected != actual) {
    std::cerr << "FAIL! " << expected << " vs " << actual << std::endl;
    abort();
  }
}

template <int numElements>
void BM_serial(benchmark::State& state) {
  auto input = getInputs(numElements);
  uint64_t sum = 0;
  int foo = 0;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    ++foo;
    for (size_t j = 0; j < numElements; ++j) {
      sum += calculateInnerSerial(input, foo, numElements);
    }
  }
  checkResults(input, sum, foo, numElements);
}

uint64_t calculateInnerDispenso(uint64_t input, size_t foo, int numElements) {
  std::vector<uint64_t> sums;
  sums.reserve(g_numThreads + 1);
  dispenso::parallel_for(
      sums,
      []() { return uint64_t{0}; },
      0,
      kWorkMultiplier * numElements,
      [input, foo](uint64_t& lsumStore, size_t i, size_t end) {
        uint64_t lsum = 0;
        for (; i != end; ++i) {
          lsum += calculate(input, i, foo);
        }
        lsumStore += lsum;
      });
  uint64_t sum = 0;
  for (auto s : sums) {
    sum += s;
  }
  return sum;
}

void BM_dispenso(benchmark::State& state) {
  g_numThreads = state.range(0) - 1;
  const int numElements = state.range(1);

  dispenso::resizeGlobalThreadPool(g_numThreads);

  uint64_t sum = 0;
  int foo = 0;

  auto input = getInputs(numElements);
  for (auto UNUSED_VAR : state) {
    std::vector<uint64_t> sums;
    sums.reserve(g_numThreads + 1);
    ++foo;
    dispenso::parallel_for(
        sums,
        []() { return uint64_t{0}; },
        0,
        numElements,
        [numElements, input, foo](uint64_t& lsumStore, size_t j, size_t end) {
          uint64_t lsum = 0;
          for (; j != end; ++j) {
            lsum += calculateInnerDispenso(input, foo, numElements);
          }
          lsumStore += lsum;
        });
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo, numElements);
}

uint64_t calculateInnerDispensoAuto(uint64_t input, size_t foo, int numElements) {
  std::vector<uint64_t> sums;
  sums.reserve(g_numThreads + 1);
  dispenso::ParForOptions options;
  options.defaultChunking = dispenso::ParForChunking::kAuto;
  dispenso::parallel_for(
      sums,
      []() { return uint64_t{0}; },
      0,
      kWorkMultiplier * numElements,
      [input, foo](uint64_t& lsumStore, size_t i, size_t end) {
        uint64_t lsum = 0;
        for (; i != end; ++i) {
          lsum += calculate(input, i, foo);
        }
        lsumStore += lsum;
      },
      options);
  uint64_t sum = 0;
  for (auto s : sums) {
    sum += s;
  }
  return sum;
}

void BM_dispenso_auto(benchmark::State& state) {
  g_numThreads = state.range(0) - 1;
  const int numElements = state.range(1);

  dispenso::resizeGlobalThreadPool(g_numThreads);

  uint64_t sum = 0;
  int foo = 0;

  dispenso::ParForOptions options;
  options.defaultChunking = dispenso::ParForChunking::kAuto;

  auto input = getInputs(numElements);
  for (auto UNUSED_VAR : state) {
    std::vector<uint64_t> sums;
    sums.reserve(g_numThreads + 1);
    ++foo;
    dispenso::parallel_for(
        sums,
        []() { return uint64_t{0}; },
        0,
        numElements,
        [numElements, input, foo](uint64_t& lsumStore, size_t j, size_t end) {
          uint64_t lsum = 0;
          for (; j != end; ++j) {
            lsum += calculateInnerDispensoAuto(input, foo, numElements);
          }
          lsumStore += lsum;
        },
        options);
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo, numElements);
}

#if defined(_OPENMP)
uint64_t calculateInnerOmp(uint64_t input, size_t foo, int numElements) {
  uint64_t sum = 0;
#pragma omp parallel for reduction(+ : sum)
  for (int i = 0; i < kWorkMultiplier * numElements; ++i) {
    sum += calculate(input, i, foo);
  }
  return sum;
}

void BM_omp(benchmark::State& state) {
  g_numThreads = state.range(0);
  const int numElements = state.range(1);

  omp_set_num_threads(g_numThreads);

  uint64_t sum = 0;

  int foo = 0;

  auto input = getInputs(numElements);
  for (auto UNUSED_VAR : state) {
    sum = 0;
    ++foo;
#pragma omp parallel for reduction(+ : sum)
    for (int i = 0; i < numElements; ++i) {
      sum += calculateInnerOmp(input, foo, numElements);
    }
  }
  checkResults(input, sum, foo, numElements);
}
#endif /*defined(_OPENMP)*/

#if !defined(BENCHMARK_WITHOUT_TBB)
uint64_t calculateInnerTbb(uint64_t input, size_t foo, int numElements) {
  return tbb::parallel_reduce(
      tbb::blocked_range<size_t>(0, kWorkMultiplier * numElements),
      uint64_t{0},
      [input, foo](const tbb::blocked_range<size_t>& r, uint64_t init) -> uint64_t {
        for (size_t a = r.begin(); a != r.end(); ++a)
          init += calculate(input, a, foo);
        return init;
      },
      [](uint64_t x, uint64_t y) -> uint64_t { return x + y; });
}

void BM_tbb(benchmark::State& state) {
  g_numThreads = state.range(0);
  const int numElements = state.range(1);

  uint64_t sum = 0;

  int foo = 0;

  auto input = getInputs(numElements);
  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(g_numThreads);
    ++foo;
    sum = tbb::parallel_reduce(
        tbb::blocked_range<size_t>(0, numElements),
        uint64_t{0},
        [numElements, input, foo](const tbb::blocked_range<size_t>& r, uint64_t init) -> uint64_t {
          for (size_t a = r.begin(); a != r.end(); ++a)
            init += calculateInnerTbb(input, foo, numElements);
          return init;
        },
        [](uint64_t x, uint64_t y) -> uint64_t { return x + y; });
  }
  checkResults(input, sum, foo, numElements);
}
#endif // !BENCHMARK_WITHOUT_TBB

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
BENCHMARK_TEMPLATE(BM_serial, kLargeSize);

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif

#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_auto)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/nested_pool_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * Benchmarks for nested/hierarchical task scheduling patterns.
 * Tests scenarios where tasks spawn additional child tasks.
 */

#include <cmath>

#include <dispenso/task_set.h>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include <tbb/task_group.h>
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#if !defined(BENCHMARK_WITHOUT_FOLLY)
#include <folly/VirtualExecutor.h>
#include <folly/coro/BlockingWait.h>
#include <folly/coro/Collect.h>
#include <folly/coro/Coroutine.h>
#include <folly/coro/Task.h>
#include <folly/executors/CPUThreadPoolExecutor.h>
#endif // !BENCHMARK_WITHOUT_FOLLY

#include "thread_benchmark_common.h"

static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 10000;
static constexpr int kLargeSize = 300000;

struct alignas(64) Work {
  size_t count = 0;

  void operator+=(size_t o) {
    count += o;
  }
};

Work g_work[1025];
std::atomic<int> g_tCounter{0};
inline int testTid() {
  static DISPENSO_THREAD_LOCAL int t = -1;
  if (t < 0) {
    t = g_tCounter.fetch_add(1, std::memory_order_acq_rel);
  }
  return t;
}

inline Work& work() {
  static DISPENSO_THREAD_LOCAL Work* w = nullptr;

  if (!w) {
    if (testTid() == 0) {
      w = g_work + 1024;
    } else {
      w = g_work + (testTid() & 1023);
    }
  }
  return *w;
}

void BM_dispenso(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  dispenso::ThreadPool pool(num_threads);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet outerTasks(pool);
    for (int i = 0; i < num_elements; ++i) {
      outerTasks.schedule([&pool, num_elements]() {
        int num = std::sqrt(num_elements);
        dispenso::TaskSet tasks(pool);
        for (int j = 0; j < num; ++j) {
          tasks.schedule([j]() { work() += j; });
        }
      });
    }
  }
}

void BM_dispenso_bulk(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  dispenso::ThreadPool pool(num_threads);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet outerTasks(pool);
    outerTasks.scheduleBulk(static_cast<size_t>(num_elements), [&pool, num_elements](size_t) {
      return [&pool, num_elements]() {
        int num = std::sqrt(num_elements);
        dispenso::TaskSet tasks(pool);
        tasks.scheduleBulk(
            static_cast<size_t>(num), [](size_t j) { return [j]() { work() += j; }; });
      };
    });
  }
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  tbb_compat::task_scheduler_init initsched(num_threads);
  for (auto UNUSED_VAR : state) {
    tbb::task_group g;
    for (int i = 0; i < num_elements; ++i) {
      g.run([num_elements]() {
        int num = std::sqrt(num_elements);
        tbb::task_group g2;
        for (int j = 0; j < num; ++j) {
          g2.run([j]() { work() += j; });
        }
        g2.wait();
      });
    }
    g.wait();
  }
}
#endif // !BENCHMARK_WITHOUT_TBB

#if !defined(BENCHMARK_WITHOUT_FOLLY)
void BM_folly(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  if (num_elements > 10000) {
    state.SkipWithError("We run out of memory here with too many elements");
  }

  folly::CPUThreadPoolExecutor follyExec(num_threads);
  for (auto UNUSED_VAR : state) {
    folly::coro::blockingWait([&]() -> folly::coro::Task<void> {
      std::vector<folly::coro::Task<void>> tasks;
      for (int i = 0; i < num_elements; ++i) {
        tasks.push_back(folly::coro::co_invoke([num_elements]() -> folly::coro::Task<void> {
          co_await folly::coro::co_reschedule_on_current_executor;
          std::vector<folly::coro::Task<void>> tasks2;
          int num = std::sqrt(num_elements);
          for (int j = 0; j < num; ++j) {
            tasks2.push_back(folly::coro::co_invoke([j]() -> folly::coro::Task<void> {
              co_await folly::coro::co_reschedule_on_current_executor;
              work() += j;
            }));
          }
          co_await folly::coro::collectAllRange(std::move(tasks2));
        }));
      }
      co_await folly::coro::collectAllRange(std::move(tasks)).scheduleOn(&follyExec);
    }());
  }
}
#endif // !BENCHMARK_WITHOUT_FOLLY

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int s : pow2HalfStepThreads()) {
      b->Args({s, j});
    }
  }
}

#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
#if !defined(BENCHMARK_WITHOUT_FOLLY)
BENCHMARK(BM_folly)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_FOLLY
BENCHMARK(BM_dispenso)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
BENCHMARK(BM_dispenso_bulk)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/once_function_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cstring>
#include <deque>
#include <functional>

#include <dispenso/once_function.h>

#include "benchmark_common.h"

constexpr size_t kSmallSize = 24;
constexpr size_t kMediumSize = 120;
constexpr size_t kLargeSize = 248;
// 1000 is larger than our largest optimized chunk size, so we may expect to see performance falloff
// here.
constexpr size_t kExtraLargeSize = 1000;

template <typename ExeType, typename Func>
void runMoveLoop(benchmark::State& state, Func f) {
  for (auto UNUSED_VAR : state) {
    ExeType t(f);
    ExeType o;
    for (int i = 0; i < 10; ++i) {
      o = std::move(t);
      t = std::move(o);
    }
    t();
  }
}

template <typename Func>
class FuncConsumer {
 public:
  void add(Func&& f) {
    funcs_.emplace_back(std::move(f));
  }

  void consumeAll() {
    while (!funcs_.empty()) {
      Func f = std::move(funcs_.front());
      funcs_.pop_front();
      f();
    }
  }

 private:
  std::deque<Func> funcs_;
};

template <size_t kSize>
struct Foo {
  Foo() {
    buf[0] = 0;
    benchmark::ClobberMemory();
  }

  Foo(Foo<kSize>&& f) {
    std::memcpy(buf, f.buf, kSize);
  }

  Foo(const Foo<kSize>& f) {
    std::memcpy(buf, f.buf, kSize);
  }

  void operator()() {
    benchmark::DoNotOptimize(++buf[0]);
  }

  uint32_t buf[kSize / 4];
};

template <typename F>
void onceCall(F&& f) {
  F lf = std::move(f);
  lf();
}

template <size_t kSize>
void BM_move_std_function(benchmark::State& state) {
  runMoveLoop<std::function<void()>>(state, Foo<kSize>());
}

template <size_t kSize>
void BM_move_once_function(benchmark::State& state) {
  runMoveLoop<dispenso::OnceFunction>(state, Foo<kSize>());
}

constexpr int kMediumLoopLen = 200;

template <size_t kSize>
void BM_queue_inline_function(benchmark::State& state) {
  FuncConsumer<Foo<kSize>> consumer;
  for (auto UNUSED_VAR : state) {
    for (int i = 0; i < kMediumLoopLen; ++i) {
      consumer.add(Foo<kSize>());
    }
    consumer.consumeAll();
  }
}

template <size_t kSize>
void BM_queue_std_function(benchmark::State& state) {
  FuncConsumer<std::function<void()>> consumer;
  for (auto UNUSED_VAR : state) {
    for (int i = 0; i < kMediumLoopLen; ++i) {
      consumer.add(Foo<kSize>());
    }
    consumer.consumeAll();
  }
}

template <size_t kSize>
void BM_queue_once_function(benchmark::State& state) {
  FuncConsumer<dispenso::OnceFunction> consumer;
  for (auto UNUSED_VAR : state) {
    for (int i = 0; i < kMediumLoopLen; ++i) {
      consumer.add(Foo<kSize>());
    }
    consumer.consumeAll();
  }
}

BENCHMARK_TEMPLATE(BM_move_std_function, kSmallSize);
BENCHMARK_TEMPLATE(BM_move_once_function, kSmallSize);

BENCHMARK_TEMPLATE(BM_move_std_function, kMediumSize);
BENCHMARK_TEMPLATE(BM_move_once_function, kMediumSize);

BENCHMARK_TEMPLATE(BM_move_std_function, kLargeSize);
BENCHMARK_TEMPLATE(BM_move_once_function, kLargeSize);

BENCHMARK_TEMPLATE(BM_move_std_function, kExtraLargeSize);
BENCHMARK_TEMPLATE(BM_move_once_function, kExtraLargeSize);

BENCHMARK_TEMPLATE(BM_queue_inline_function, kSmallSize);
BENCHMARK_TEMPLATE(BM_queue_std_function, kSmallSize);
BENCHMARK_TEMPLATE(BM_queue_once_function, kSmallSize);

BENCHMARK_TEMPLATE(BM_queue_inline_function, kMediumSize);
BENCHMARK_TEMPLATE(BM_queue_std_function, kMediumSize);
BENCHMARK_TEMPLATE(BM_queue_once_function, kMediumSize);

BENCHMARK_TEMPLATE(BM_queue_inline_function, kLargeSize);
BENCHMARK_TEMPLATE(BM_queue_std_function, kLargeSize);
BENCHMARK_TEMPLATE(BM_queue_once_function, kLargeSize);

BENCHMARK_TEMPLATE(BM_queue_inline_function, kExtraLargeSize);
BENCHMARK_TEMPLATE(BM_queue_std_function, kExtraLargeSize);
BENCHMARK_TEMPLATE(BM_queue_once_function, kExtraLargeSize);

BENCHMARK_MAIN();


================================================
FILE: benchmarks/pipeline_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cmath>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <random>

#include <dispenso/pipeline.h>

#include "benchmark_common.h"

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include <taskflow/taskflow.hpp>
#if TF_VERSION > 300000
#include <taskflow/algorithm/pipeline.hpp>
#endif // TF_VERSION

// (1) Generate images
// (2) Calculate geometric mean
// (3) Tonemap based on geometric mean

constexpr size_t kWidth = 256;
constexpr size_t kHeight = 256;
constexpr size_t kNumImages = 500;
constexpr size_t kSeed = 55;

struct Work {
  Work(size_t idx) : index(idx) {}

  Work(Work&& w)
      : index(w.index), geometricMean(w.geometricMean), inputImage(std::move(w.inputImage)) {}

  Work& operator=(Work&& w) {
    index = w.index;
    geometricMean = w.geometricMean;
    inputImage = std::move(w.inputImage);
    return *this;
  }

  size_t index;
  double geometricMean = 0;
  std::unique_ptr<uint16_t[]> inputImage;
};

std::vector<std::unique_ptr<uint8_t[]>> g_serialResults;

Work fillImage(Work work) {
  std::mt19937 rng(kSeed + work.index);

  work.inputImage = std::make_unique<uint16_t[]>(kWidth * kHeight);
  std::uniform_int_distribution<uint16_t> dist;

  for (size_t i = 0; i < kWidth * kHeight; ++i) {
    work.inputImage[i] = dist(rng);
  }

  return work;
}

Work computeGeometricMean(Work work) {
  double sum = 0;
  for (size_t i = 0; i < kWidth * kHeight; ++i) {
    sum += std::log(1.0e-10 + work.inputImage[i]);
  }
  work.geometricMean = std::exp(sum / (kWidth * kHeight));
  return work;
}

std::unique_ptr<uint8_t[]> tonemap(Work work) {
  auto out = std::make_unique<uint8_t[]>(kWidth * kHeight);
  for (size_t i = 0; i < kWidth * kHeight; ++i) {
    double adjLum = work.inputImage[i] / work.geometricMean;
    out[i] = 255 * adjLum / (1.0 + adjLum);
  }
  return out;
}

void runSerial() {
  g_serialResults.resize(kNumImages);

  size_t index = 0;
  for (auto& out : g_serialResults) {
    out = tonemap(computeGeometricMean(fillImage(Work(index++))));
  }
}

void checkResults(const std::vector<std::unique_ptr<uint8_t[]>>& results) {
  if (g_serialResults.empty()) {
    runSerial();
  }

  if (g_serialResults.size() != results.size()) {
    std::cerr << "Number of results don't match" << std::endl;
    std::abort();
  }

  for (size_t i = 0; i < results.size(); ++i) {
    if (std::memcmp(
            g_serialResults[i].get(), results[i].get(), kWidth * kHeight * sizeof(uint8_t))) {
      std::cerr << "Mismatch in results" << std::endl;

      for (size_t j = 0; j < 10; ++j) {
        std::cerr << (int)g_serialResults[i][j] << " vs " << (int)results[i][j] << std::endl;
      }
      std::abort();
    }
  }
}

void BM_serial(benchmark::State& state) {
  for (auto UNUSED_VAR : state) {
    runSerial();
  }
}

void runDispenso(std::vector<std::unique_ptr<uint8_t[]>>& results) {
  results.resize(kNumImages);

  size_t counter = 0;

  dispenso::pipeline(
      [&counter]() -> dispenso::OpResult<Work> {
        if (counter < kNumImages) {
          return fillImage(Work(counter++));
        }
        return {};
      },
      computeGeometricMean,
      [&results](Work work) {
        size_t index = work.index;
        results[index] = tonemap(std::move(work));
      });
}

void BM_dispenso(benchmark::State& state) {
  std::vector<std::unique_ptr<uint8_t[]>> results;

  (void)dispenso::globalThreadPool();

  for (auto UNUSED_VAR : state) {
    runDispenso(results);
  }

  checkResults(results);
}

void runDispensoPar(std::vector<std::unique_ptr<uint8_t[]>>& results) {
  results.resize(kNumImages);
  std::atomic<size_t> counter(0);

  dispenso::pipeline(
      dispenso::stage(
          [&counter]() -> dispenso::OpResult<Work> {
            size_t curIndex = counter.fetch_add(1, std::memory_order_acquire);
            if (curIndex < kNumImages) {
              return fillImage(Work(curIndex));
            }
            return {};
          },
          dispenso::kStageNoLimit),
      dispenso::stage(computeGeometricMean, dispenso::kStageNoLimit),
      dispenso::stage(
          [&results](Work work) {
            size_t index = work.index;
            results[index] = tonemap(std::move(work));
          },
          dispenso::kStageNoLimit));
}

void BM_dispenso_par(benchmark::State& state) {
  std::vector<std::unique_ptr<uint8_t[]>> results;

  (void)dispenso::globalThreadPool();

  for (auto UNUSED_VAR : state) {
    runDispensoPar(results);
  }

  checkResults(results);
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void runTBB(std::vector<std::unique_ptr<uint8_t[]>>& results) {
  results.resize(kNumImages);

  size_t counter = 0;
  tbb::parallel_pipeline(
      /*max_number_of_live_token=*/std::thread::hardware_concurrency(),
      tbb::make_filter<void, Work*>(
          tbb_compat::filter::serial,
          [&counter](tbb::flow_control& fc) -> Work* {
            if (counter < kNumImages) {
              return new Work(fillImage(Work(counter++)));
            }
            fc.stop();
            return nullptr;
          }) &
          tbb::make_filter<Work*, Work*>(
              tbb_compat::filter::serial,
              [](Work* workIn) {
                Work& work = *workIn;
                work = computeGeometricMean(std::move(work));
                return workIn;
              }) &
          tbb::make_filter<Work*, void>(tbb_compat::filter::serial, [&results](Work* workIn) {
            size_t index = workIn->index;
            results[index] = tonemap(std::move(*workIn));
            delete workIn;
          }));
}

void BM_tbb(benchmark::State& state) {
  std::vector<std::unique_ptr<uint8_t[]>> results;

  for (auto UNUSED_VAR : state) {
    runTBB(results);
  }

  checkResults(results);
}

void runTBBPar(std::vector<std::unique_ptr<uint8_t[]>>& results) {
  results.resize(kNumImages);

  std::atomic<size_t> counter(0);
  tbb::parallel_pipeline(
      /*max_number_of_live_token=*/std::thread::hardware_concurrency(),
      tbb::make_filter<void, Work*>(
          tbb_compat::filter::parallel,
          [&counter](tbb::flow_control& fc) -> Work* {
            size_t curIndex = counter.fetch_add(1, std::memory_order_acquire);
            if (curIndex < kNumImages) {
              return new Work(fillImage(Work(curIndex)));
            }
            fc.stop();
            return nullptr;
          }) &
          tbb::make_filter<Work*, Work*>(
              tbb_compat::filter::parallel,
              [](Work* work) {
                *work = computeGeometricMean(std::move(*work));
                return work;
              }) &
          tbb::make_filter<Work*, void>(tbb_compat::filter::parallel, [&results](Work* work) {
            size_t index = work->index;
            results[index] = tonemap(std::move(*work));
            delete work;
          }));
}

void BM_tbb_par(benchmark::State& state) {
  std::vector<std::unique_ptr<uint8_t[]>> results;

  for (auto UNUSED_VAR : state) {
    runTBBPar(results);
  }

  checkResults(results);
}
#endif // !BENCHMARK_WITHOUT_TBB

#if TF_VERSION > 300000
void runTaskflow(std::vector<std::unique_ptr<uint8_t[]>>& results, tf::Executor& exec) {
  results.resize(kNumImages);
  std::vector<std::unique_ptr<Work>> work;
  // Ensure we don't resize underlying buffer causing data races
  work.reserve(kNumImages);

  tf::Taskflow taskflow;

  size_t counter2 = 0;
  size_t counter3 = 0;
  tf::Pipeline pl(
      std::thread::hardware_concurrency(),
      tf::Pipe{
          tf::PipeType::SERIAL,
          [&work](auto& pf) mutable {
            if (work.size() < kNumImages) {
              work.push_back(std::make_unique<Work>(fillImage(Work(work.size()))));
            } else {
              pf.stop();
            }
          }},
      tf::Pipe{
          tf::PipeType::SERIAL,
          [&counter2, &work](auto& pf) mutable {
            Work& w = *work[counter2++];
            w = computeGeometricMean(std::move(w));
          }},
      tf::Pipe{tf::PipeType::SERIAL, [&counter3, &work, &results](auto& pf) mutable {
                 Work& w = *work[counter3];
                 results[counter3++] = tonemap(std::move(w));
               }});
  taskflow.composed_of(pl);
  exec.run(taskflow).wait();
}

void BM_taskflow(benchmark::State& state) {
  std::vector<std::unique_ptr<uint8_t[]>> results;
  tf::Executor executor(std::thread::hardware_concurrency());

  for (auto UNUSED_VAR : state) {
    runTaskflow(results, executor);
  }

  checkResults(results);
}

void runTaskflowPar(std::vector<std::unique_ptr<uint8_t[]>>& results, tf::Executor& exec) {
  results.resize(kNumImages);
  std::vector<std::unique_ptr<Work>> work(kNumImages);

  tf::Taskflow taskflow;

  tf::Pipeline pl(
      std::thread::hardware_concurrency(),
      tf::Pipe{
          tf::PipeType::SERIAL, // First pipe must be serial in taskflow
          [&work](tf::Pipeflow& pf) {
            if (pf.token() >= kNumImages) {
              pf.stop();
              return;
            }
            work[pf.token()] = std::make_unique<Work>(fillImage(Work(pf.token())));
          }},
      tf::Pipe{
          tf::PipeType::PARALLEL,
          [&work](tf::Pipeflow& pf) {
            Work& w = *work[pf.token()];
            w = computeGeometricMean(std::move(w));
          }},
      tf::Pipe{tf::PipeType::PARALLEL, [&work, &results](tf::Pipeflow& pf) {
                 size_t token = pf.token();
                 results[token] = tonemap(std::move(*work[token]));
               }});

  taskflow.composed_of(pl);
  exec.run(taskflow).wait();
}

void BM_taskflow_par(benchmark::State& state) {
  std::vector<std::unique_ptr<uint8_t[]>> results;
  tf::Executor executor(std::thread::hardware_concurrency());

  for (auto UNUSED_VAR : state) {
    runTaskflowPar(results, executor);
  }

  checkResults(results);
}
#endif // TF_VERSION > 300000

BENCHMARK(BM_serial)->UseRealTime();
BENCHMARK(BM_dispenso)->UseRealTime();
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
#if TF_VERSION > 300000
BENCHMARK(BM_taskflow)->UseRealTime();
#endif // TF_VERSION > 300000

BENCHMARK(BM_dispenso_par)->UseRealTime();
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb_par)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

#if TF_VERSION > 300000
BENCHMARK(BM_taskflow_par)->UseRealTime();
#endif // TF_VERSION > 300000

BENCHMARK_MAIN();


================================================
FILE: benchmarks/pool_allocator_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/pool_allocator.h>

#include <deque>

#include <dispenso/task_set.h>

#include "benchmark_common.h"

constexpr size_t kSmallSize = 1024;
constexpr size_t kMediumSize = 8192;
constexpr size_t kLargeSize = 65536;

template <typename Alloc, typename Free>
void run(benchmark::State& state, Alloc alloc, Free dealloc) {
  std::vector<char*> ptrs(state.range(0));
  for (auto UNUSED_VAR : state) {
    for (char*& p : ptrs) {
      p = alloc();
    }
    for (char* p : ptrs) {
      dealloc(p);
    }
  }
}

template <typename PoolAlloc>
void runArena(benchmark::State& state, PoolAlloc& allocator) {
  std::vector<char*> ptrs(state.range(0));
  for (auto UNUSED_VAR : state) {
    for (char*& p : ptrs) {
      p = allocator.alloc();
    }
    allocator.clear();
  }
}

template <size_t kSize>
void BM_mallocfree(benchmark::State& state) {
  run(
      state,
      []() { return reinterpret_cast<char*>(::malloc(kSize)); },
      [](char* buf) { ::free(buf); });
}

template <size_t kSize>
void BM_pool_allocator(benchmark::State& state) {
  dispenso::PoolAllocator allocator(kSize, kSize * 32, ::malloc, ::free);
  run(
      state,
      [&allocator]() { return allocator.alloc(); },
      [&allocator](char* buf) { allocator.dealloc(buf); });
}

template <size_t kSize>
void BM_nl_pool_allocator(benchmark::State& state) {
  dispenso::NoLockPoolAllocator allocator(kSize, kSize * 32, ::malloc, ::free);
  run(
      state,
      [&allocator]() { return allocator.alloc(); },
      [&allocator](char* buf) { allocator.dealloc(buf); });
}

template <size_t kSize>
void BM_pool_allocator_arena(benchmark::State& state) {
  dispenso::PoolAllocator allocator(kSize, kSize * 32, ::malloc, ::free);
  runArena(state, allocator);
}

template <size_t kSize>
void BM_nl_pool_allocator_arena(benchmark::State& state) {
  dispenso::NoLockPoolAllocator allocator(kSize, kSize * 32, ::malloc, ::free);
  runArena(state, allocator);
}

template <size_t kThreads, typename Alloc, typename Free>
void runThreaded(benchmark::State& state, Alloc alloc, Free dealloc) {
  dispenso::resizeGlobalThreadPool(kThreads);
  std::vector<char*> ptrsArray[kThreads];
  for (auto& ptrs : ptrsArray) {
    ptrs.resize(state.range(0));
  }
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(dispenso::globalThreadPool());
    for (size_t i = 0; i < kThreads; ++i) {
      tasks.schedule([alloc, dealloc, &ptrs = ptrsArray[i]]() {
        for (char*& p : ptrs) {
          p = alloc();
        }
        for (char* p : ptrs) {
          dealloc(p);
        }
      });
    }
  }
}

template <size_t kSize, size_t kThreads>
void BM_mallocfree_threaded(benchmark::State& state) {
  runThreaded<kThreads>(
      state,
      []() { return reinterpret_cast<char*>(::malloc(kSize)); },
      [](char* buf) { ::free(buf); });
}

template <size_t kSize, size_t kThreads>
void BM_pool_allocator_threaded(benchmark::State& state) {
  dispenso::PoolAllocator allocator(kSize, (1 << 20), ::malloc, ::free);
  runThreaded<kThreads>(
      state,
      [&allocator]() { return allocator.alloc(); },
      [&allocator](char* buf) { allocator.dealloc(buf); });
}

BENCHMARK_TEMPLATE(BM_mallocfree, kSmallSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_pool_allocator, kSmallSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_nl_pool_allocator, kSmallSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_mallocfree, kMediumSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_pool_allocator, kMediumSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_nl_pool_allocator, kMediumSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_mallocfree, kLargeSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_pool_allocator, kLargeSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_nl_pool_allocator, kLargeSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_pool_allocator_arena, kSmallSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_nl_pool_allocator_arena, kSmallSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_pool_allocator_arena, kMediumSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_nl_pool_allocator_arena, kMediumSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_pool_allocator_arena, kLargeSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_nl_pool_allocator_arena, kLargeSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kSmallSize, 2)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kSmallSize, 2)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kMediumSize, 2)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kMediumSize, 2)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kLargeSize, 2)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kLargeSize, 2)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kSmallSize, 8)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kSmallSize, 8)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kMediumSize, 8)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kMediumSize, 8)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kLargeSize, 8)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kLargeSize, 8)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kSmallSize, 16)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kSmallSize, 16)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kMediumSize, 16)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kMediumSize, 16)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE2(BM_mallocfree_threaded, kLargeSize, 16)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE2(BM_pool_allocator_threaded, kLargeSize, 16)->Range(1 << 13, 1 << 15);

BENCHMARK_MAIN();


================================================
FILE: benchmarks/run_benchmarks.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""
Benchmark runner and chart generator for dispenso.

This script runs the dispenso benchmarks, collects results, and generates
comparison charts. Results include machine information for reproducibility.

Usage:
    python run_benchmarks.py [--output-dir OUTPUT] [--benchmarks PATTERN]

Requirements:
    pip install matplotlib pandas
"""

import argparse
import json
import os
import platform
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional

try:
    import matplotlib.pyplot as plt
    import pandas as pd

    HAS_PLOTTING = True
except ImportError:
    HAS_PLOTTING = False


def get_machine_info() -> Dict[str, Any]:
    """Gather machine information for benchmark context."""
    info = {
        "timestamp": datetime.now().isoformat(),
        "platform": platform.system(),
        "platform_release": platform.release(),
        "platform_version": platform.version(),
        "architecture": platform.machine(),
        "processor": platform.processor(),
        "python_version": platform.python_version(),
    }

    # Try to get more detailed CPU info
    if platform.system() == "Linux":
        try:
            with open("/proc/cpuinfo", "r") as f:
                cpuinfo = f.read()
            # Extract model name
            match = re.search(r"model name\s*:\s*(.+)", cpuinfo)
            if match:
                info["cpu_model"] = match.group(1).strip()
            # Count cores
            info["cpu_cores"] = cpuinfo.count("processor\t:")
            # Get cache info
            match = re.search(r"cache size\s*:\s*(.+)", cpuinfo)
            if match:
                info["cache_size"] = match.group(1).strip()
        except Exception as e:
            info["cpu_info_error"] = str(e)

        # Memory info
        try:
            with open("/proc/meminfo", "r") as f:
                meminfo = f.read()
            match = re.search(r"MemTotal:\s*(\d+)", meminfo)
            if match:
                info["memory_kb"] = int(match.group(1))
                info["memory_gb"] = round(int(match.group(1)) / 1024 / 1024, 1)
        except Exception:
            pass

    elif platform.system() == "Darwin":  # macOS
        try:
            result = subprocess.run(
                ["sysctl", "-n", "machdep.cpu.brand_string"],
                capture_output=True,
                text=True,
            )
            if result.returncode == 0:
                info["cpu_model"] = result.stdout.strip()

            result = subprocess.run(
                ["sysctl", "-n", "hw.ncpu"], capture_output=True, text=True
            )
            if result.returncode == 0:
                info["cpu_cores"] = int(result.stdout.strip())

            result = subprocess.run(
                ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True
            )
            if result.returncode == 0:
                mem_bytes = int(result.stdout.strip())
                info["memory_gb"] = round(mem_bytes / 1024 / 1024 / 1024, 1)
        except Exception as e:
            info["cpu_info_error"] = str(e)

    elif platform.system() == "Windows":
        try:
            import winreg

            key = winreg.OpenKey(
                winreg.HKEY_LOCAL_MACHINE,
                r"HARDWARE\DESCRIPTION\System\CentralProcessor\0",
            )
            info["cpu_model"] = winreg.QueryValueEx(key, "ProcessorNameString")[0]
            winreg.CloseKey(key)
        except Exception:
            pass

        try:
            result = subprocess.run(
                ["wmic", "computersystem", "get", "totalphysicalmemory"],
                capture_output=True,
                text=True,
            )
            lines = result.stdout.strip().split("\n")
            if len(lines) > 1:
                mem_bytes = int(lines[1].strip())
                info["memory_gb"] = round(mem_bytes / 1024 / 1024 / 1024, 1)
        except Exception:
            pass

        info["cpu_cores"] = os.cpu_count()

    return info


def find_benchmarks(build_dir: Path, pattern: Optional[str] = None) -> List[Path]:
    """Find benchmark executables in the build directory."""
    benchmarks = []

    # Common locations
    search_paths = [
        build_dir / "bin",
        build_dir / "benchmarks",
        build_dir,
    ]

    for search_path in search_paths:
        if not search_path.exists():
            continue

        for f in search_path.iterdir():
            if not f.is_file():
                continue
            name = f.name
            if "benchmark" in name.lower():
                if pattern is None or re.search(pattern, name):
                    benchmarks.append(f)

    return sorted(benchmarks)


def run_benchmark(benchmark_path: Path, extra_args: List[str] = None) -> Dict[str, Any]:
    """Run a single benchmark and return results."""
    args = [str(benchmark_path), "--benchmark_format=json"]
    if extra_args:
        args.extend(extra_args)

    print(f"Running: {benchmark_path.name}...")

    try:
        result = subprocess.run(
            args,
            capture_output=True,
            text=True,
            timeout=600,  # 10 minute timeout per benchmark
        )

        if result.returncode != 0:
            return {
                "name": benchmark_path.name,
                "success": False,
                "error": result.stderr,
            }

        # Parse JSON output
        # google benchmark outputs JSON to stdout
        try:
            data = json.loads(result.stdout)
            return {
                "name": benchmark_path.name,
                "success": True,
                "data": data,
            }
        except json.JSONDecodeError:
            # Maybe it's not a google benchmark, try to parse stdout
            return {
                "name": benchmark_path.name,
                "success": True,
                "raw_output": result.stdout,
            }

    except subprocess.TimeoutExpired:
        return {
            "name": benchmark_path.name,
            "success": False,
            "error": "Timeout after 600 seconds",
        }
    except Exception as e:
        return {
            "name": benchmark_path.name,
            "success": False,
            "error": str(e),
        }


def extract_benchmark_data(results: List[Dict]) -> pd.DataFrame:
    """Extract benchmark data into a DataFrame for plotting."""
    rows = []

    for result in results:
        if not result.get("success") or "data" not in result:
            continue

        data = result["data"]
        benchmarks = data.get("benchmarks", [])

        for bm in benchmarks:
            row = {
                "suite": result["name"].replace("_benchmark", ""),
                "name": bm.get("name", "unknown"),
                "real_time": bm.get("real_time", 0),
                "cpu_time": bm.get("cpu_time", 0),
                "time_unit": bm.get("time_unit", "ns"),
                "iterations": bm.get("iterations", 0),
            }

            # Extract any custom counters
            for key, value in bm.items():
                if key not in row and isinstance(value, (int, float)):
                    row[key] = value

            rows.append(row)

    return pd.DataFrame(rows)


def generate_charts(df: pd.DataFrame, output_dir: Path):
    """Generate comparison charts from benchmark data."""
    if df.empty:
        print("No data to plot")
        return

    output_dir.mkdir(parents=True, exist_ok=True)

    # Group by suite
    for suite, suite_df in df.groupby("suite"):
        fig, ax = plt.subplots(figsize=(12, 6))

        # Bar chart of real_time
        suite_df_sorted = suite_df.sort_values("real_time")
        bars = ax.barh(suite_df_sorted["name"], suite_df_sorted["real_time"])

        ax.set_xlabel(f"Time ({suite_df_sorted['time_unit'].iloc[0]})")
        ax.set_title(f"{suite} Benchmark Results")
        ax.grid(axis="x", alpha=0.3)

        # Add value labels
        for bar, val in zip(bars, suite_df_sorted["real_time"]):
            ax.text(
                val,
                bar.get_y() + bar.get_height() / 2,
                f" {val:.2f}",
                va="center",
                fontsize=8,
            )

        plt.tight_layout()
        chart_path = output_dir / f"{suite}_chart.png"
        plt.savefig(chart_path, dpi=150)
        plt.close()
        print(f"Generated: {chart_path}")

    # Generate summary chart if multiple suites
    if df["suite"].nunique() > 1:
        # This would need custom logic based on what comparisons make sense
        pass


def generate_markdown_report(
    machine_info: Dict[str, Any],
    results: List[Dict],
    df: pd.DataFrame,
    output_dir: Path,
):
    """Generate a markdown report of benchmark results."""
    report_path = output_dir / "benchmark_report.md"

    with open(report_path, "w") as f:
        f.write("# Dispenso Benchmark Results\n\n")

        # Machine info
        f.write("## Machine Information\n\n")
        f.write(f"- **Date**: {machine_info.get('timestamp', 'unknown')}\n")
        f.write(
            f"- **Platform**: {machine_info.get('platform', 'unknown')} "
            f"{machine_info.get('platform_release', '')}\n"
        )
        f.write(f"- **CPU**: {machine_info.get('cpu_model', 'unknown')}\n")
        f.write(f"- **Cores**: {machine_info.get('cpu_cores', 'unknown')}\n")
        f.write(f"- **Memory**: {machine_info.get('memory_gb', 'unknown')} GB\n")
        f.write("\n")

        # Results summary
        f.write("## Results Summary\n\n")

        successful = sum(1 for r in results if r.get("success"))
        f.write(f"- **Benchmarks run**: {len(results)}\n")
        f.write(f"- **Successful**: {successful}\n")
        f.write(f"- **Failed**: {len(results) - successful}\n\n")

        # Per-suite results
        if not df.empty:
            for suite in df["suite"].unique():
                f.write(f"### {suite}\n\n")

                suite_df = df[df["suite"] == suite].sort_values("real_time")
                f.write("| Benchmark | Time | Unit | Iterations |\n")
                f.write("|-----------|------|------|------------|\n")

                for _, row in suite_df.iterrows():
                    f.write(
                        f"| {row['name']} | {row['real_time']:.2f} | "
                        f"{row['time_unit']} | {row['iterations']} |\n"
                    )

                f.write("\n")

                # Link to chart if it exists
                chart_path = f"{suite}_chart.png"
                f.write(f"![{suite} results]({chart_path})\n\n")

        # Failures
        failures = [r for r in results if not r.get("success")]
        if failures:
            f.write("## Failures\n\n")
            for fail in failures:
                f.write(f"### {fail['name']}\n\n")
                f.write(f"```\n{fail.get('error', 'Unknown error')}\n```\n\n")

    print(f"Generated report: {report_path}")


def main():
    parser = argparse.ArgumentParser(description="Run dispenso benchmarks")
    parser.add_argument(
        "--build-dir",
        "-b",
        type=Path,
        default=Path("build"),
        help="Build directory containing benchmark executables",
    )
    parser.add_argument(
        "--output-dir",
        "-o",
        type=Path,
        default=Path("benchmark_results"),
        help="Output directory for results and charts",
    )
    parser.add_argument(
        "--benchmarks",
        "-B",
        type=str,
        default=None,
        help="Regex pattern to filter benchmarks",
    )
    parser.add_argument(
        "--json-only",
        action="store_true",
        help="Only output JSON, skip chart generation",
    )

    args = parser.parse_args()

    # Gather machine info
    print("Gathering machine information...")
    machine_info = get_machine_info()
    print(f"  CPU: {machine_info.get('cpu_model', 'unknown')}")
    print(f"  Cores: {machine_info.get('cpu_cores', 'unknown')}")
    print(f"  Memory: {machine_info.get('memory_gb', 'unknown')} GB")
    print()

    # Find benchmarks
    benchmarks = find_benchmarks(args.build_dir, args.benchmarks)
    if not benchmarks:
        print(f"No benchmarks found in {args.build_dir}")
        print("Make sure you've built with -DDISPENSO_BUILD_BENCHMARKS=ON")
        sys.exit(1)

    print(f"Found {len(benchmarks)} benchmark(s):")
    for b in benchmarks:
        print(f"  - {b.name}")
    print()

    # Run benchmarks
    results = []
    for benchmark in benchmarks:
        result = run_benchmark(benchmark)
        results.append(result)
        if result["success"]:
            print(f"  ✓ {benchmark.name}")
        else:
            print(f"  ✗ {benchmark.name}: {result.get('error', 'unknown error')[:50]}")

    # Save raw results
    args.output_dir.mkdir(parents=True, exist_ok=True)

    output_data = {
        "machine_info": machine_info,
        "results": results,
    }

    json_path = args.output_dir / "benchmark_results.json"
    with open(json_path, "w") as f:
        json.dump(output_data, f, indent=2, default=str)
    print(f"\nSaved raw results to: {json_path}")

    # Generate charts and report
    if not args.json_only and HAS_PLOTTING:
        df = extract_benchmark_data(results)
        if not df.empty:
            generate_charts(df, args.output_dir)
            generate_markdown_report(machine_info, results, df, args.output_dir)
        else:
            print(
                "No benchmark data to plot (benchmarks may not use google benchmark format)"
            )
    elif not args.json_only and not HAS_PLOTTING:
        print("Warning: matplotlib/pandas not available. Charts will not be generated.")
        print("Install with: pip install matplotlib pandas")


if __name__ == "__main__":
    main()


================================================
FILE: benchmarks/rw_lock_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// This benchmark relies on shared_lock from C++17
#if __cplusplus >= 201703L

#include <dispenso/rw_lock.h>

#include <map>

#include <shared_mutex>

#include <dispenso/task_set.h>

#include "thread_benchmark_common.h"

constexpr size_t kNumValues = 1 << 20;

// Precondition: Start < writePeriod.  Note that this is enforced in BM_serial and BM_parallel
template <typename MtxType>
int64_t iterate(MtxType& mtx, std::vector<int64_t>& values, int start, int writePeriod) {
  int64_t total = 0;
  int w = start;
  for (auto& p : values) {
    if (w++ == writePeriod) {
      std::lock_guard<MtxType> lk(mtx);
      ++p;
      w = 0;
    } else {
      std::shared_lock<MtxType> lk(mtx);
      total += p;
    }
  }
  return total;
}

struct NopMutex {
  void lock() {}
  void unlock() {}
  void lock_shared() {}
  void unlock_shared() {}
};

template <typename MutexT>
void BM_serial(benchmark::State& state) {
  int writePeriod = state.range(0);
  std::vector<int64_t> values(kNumValues);
  int64_t total = 0;
  MutexT mtx;
  int start = 0;
  for (auto UNUSED_VAR : state) {
    total += iterate(mtx, values, start++, writePeriod);
    if (start == writePeriod) {
      start = 0;
    }
  }

  benchmark::DoNotOptimize(total);
}

static void CustomArgumentsSerial(benchmark::internal::Benchmark* b) {
  for (int j : {2, 8, 32, 128, 512}) {
    b->Args({j});
  }
}

template <typename MutexT>
void BM_parallel(benchmark::State& state) {
  int concurrency = state.range(0);
  int writePeriod = state.range(1);
  std::vector<int64_t> values(kNumValues);
  std::atomic<int64_t> total(0);
  MutexT mtx;
  int start = 0;

  dispenso::TaskSet tasks(dispenso::globalThreadPool());
  for (auto UNUSED_VAR : state) {
    for (int c = 0; c < concurrency; ++c) {
      tasks.schedule([&total, start, &mtx, &values, writePeriod]() {
        total.fetch_add(iterate(mtx, values, start, writePeriod), std::memory_order_acq_rel);
      });
      if (++start == writePeriod) {
        start = 0;
      }
    }
    tasks.wait();
  }

  benchmark::DoNotOptimize(total.load(std::memory_order_acquire));
}

static void CustomArgumentsParallel(benchmark::internal::Benchmark* b) {
  for (int j : {2, 8, 32, 128, 512}) {
    for (int s : {1, 2, 4, 8, 16, 32}) {
      if (s > static_cast<int>(std::thread::hardware_concurrency())) {
        break;
      }
      b->Args({s, j});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, NopMutex)->Apply(CustomArgumentsSerial)->UseRealTime();

BENCHMARK_TEMPLATE(BM_serial, std::shared_mutex)->Apply(CustomArgumentsSerial)->UseRealTime();

BENCHMARK_TEMPLATE(BM_serial, dispenso::RWLock)->Apply(CustomArgumentsSerial)->UseRealTime();

BENCHMARK_TEMPLATE(BM_parallel, std::shared_mutex)->Apply(CustomArgumentsParallel)->UseRealTime();

BENCHMARK_TEMPLATE(BM_parallel, dispenso::RWLock)->Apply(CustomArgumentsParallel)->UseRealTime();

#endif // C++17


================================================
FILE: benchmarks/simple_for_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/parallel_for.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#include <unordered_map>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_for.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include <taskflow/taskflow.hpp>
#if TF_VERSION > 300000
#include <taskflow/algorithm/for_each.hpp>
#endif // TF_VERSION

#include "thread_benchmark_common.h"

static uint32_t kSeed(8);
static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 1000000;
static constexpr int kLargeSize = 100000000;

static constexpr uint32_t kMinSizePerChunk = 10000;

const std::vector<int>& getInputs(int num_elements) {
  static std::unordered_map<int, std::vector<int>> vecs;
  auto it = vecs.find(num_elements);
  if (it != vecs.end()) {
    return it->second;
  }
  // No need to use a high-quality rng for this test.
  srand(kSeed);
  std::vector<int> values;
  values.reserve(num_elements);
  for (int i = 0; i < num_elements; ++i) {
    values.push_back((rand() & 255) - 127);
  }
  auto res = vecs.emplace(num_elements, std::move(values));
  assert(res.second);
  return res.first->second;
}

template <int num_elements>
void BM_serial(benchmark::State& state) {
  std::vector<int> output(num_elements, 0);
  auto& input = getInputs(num_elements);

  for (auto UNUSED_VAR : state) {
    for (size_t i = 0; i < num_elements; ++i) {
      output[i] = input[i] * input[i] - 3 * input[i];
    }
  }
}

void checkResults(const std::vector<int>& input, const std::vector<int>& output) {
  for (size_t i = 0; i < input.size(); ++i) {
    if (output[i] != input[i] * input[i] - 3 * input[i]) {
      std::cerr << "FAIL! " << output[i] << " vs " << input[i] * input[i] - 3 * input[i]
                << std::endl;
      abort();
    }
  }
}

void BM_dispenso(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);
  dispenso::ThreadPool pool(num_threads);

  dispenso::ParForOptions options;
  options.minItemsPerChunk = kMinSizePerChunk;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    dispenso::parallel_for(
        tasks,
        0,
        num_elements,
        [&input, &output](size_t i) { output[i] = input[i] * input[i] - 3 * input[i]; },
        options);
  }
  checkResults(input, output);
}

void BM_taskflow(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);
  auto& input = getInputs(num_elements);

  tf::Executor executor(num_threads);
  tf::Taskflow taskflow;

  for (auto UNUSED_VAR : state) {
    taskflow.for_each_index(0, num_elements, 1, [&input, &output](int i) {
      output[i] = input[i] * input[i] - 3 * input[i];
    });
    executor.run(taskflow).wait();
  }
  checkResults(input, output);
}

void BM_dispenso_static_chunk(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);
  dispenso::ThreadPool pool(num_threads);

  dispenso::ParForOptions options;
  options.minItemsPerChunk = kMinSizePerChunk;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    auto range = dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kStatic);
    dispenso::parallel_for(
        tasks,
        range,
        [&input, &output](size_t begin, size_t end) {
          for (size_t i = begin; i < end; ++i) {
            output[i] = input[i] * input[i] - 3 * input[i];
          }
        },
        options);
  }
  checkResults(input, output);
}

void BM_dispenso_auto_chunk(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);
  dispenso::ThreadPool pool(num_threads);
  dispenso::ParForOptions options;
  options.minItemsPerChunk = kMinSizePerChunk;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    auto range = dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kAuto);
    dispenso::parallel_for(
        tasks,
        range,
        [&input, &output](size_t begin, size_t end) {
          for (size_t i = begin; i < end; ++i) {
            output[i] = input[i] * input[i] - 3 * input[i];
          }
        },
        options);
  }
  checkResults(input, output);
}

#if defined(_OPENMP)
void BM_omp(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);
  omp_set_num_threads(num_threads);

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
#pragma omp parallel for
    for (int i = 0; i < num_elements; ++i) {
      output[i] = input[i] * input[i] - 3 * input[i];
    }
  }
  checkResults(input, output);
}
#endif /*defined(_OPENMP)*/

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  std::vector<int> output(num_elements, 0);

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(num_threads);

    tbb::parallel_for(
        tbb::blocked_range<size_t>(0, num_elements),
        [&input, &output](const tbb::blocked_range<size_t>& r) {
          for (size_t i = r.begin(); i < r.end(); ++i) {
            output[i] = input[i] * input[i] - 3 * input[i];
          }
        });
  }
  checkResults(input, output);
}
#endif // !BENCHMARK_WITHOUT_TBB

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
BENCHMARK_TEMPLATE(BM_serial, kLargeSize);

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif // OPENMP
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

BENCHMARK(BM_taskflow)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_static_chunk)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_auto_chunk)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/simple_pool_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * Basic thread pool benchmarks comparing dispenso, TBB, and Folly.
 * Tests simple task scheduling throughput.
 */

#include <dispenso/task_set.h>

#if !defined(BENCHMARK_WITHOUT_TBB)
#include <tbb/task_group.h>
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#if !defined(BENCHMARK_WITHOUT_FOLLY)
#include <folly/VirtualExecutor.h>
#include <folly/executors/CPUThreadPoolExecutor.h>
#endif // !BENCHMARK_WITHOUT_FOLLY

#include "thread_benchmark_common.h"

static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 10000;
static constexpr int kLargeSize = 1000000;

struct alignas(64) Work {
  size_t count = 0;

  void operator+=(size_t o) {
    count += o;
  }
};

Work g_work[1025];
std::atomic<int> g_tCounter{0};
inline int testTid() {
  static DISPENSO_THREAD_LOCAL int t = -1;
  if (t < 0) {
    t = g_tCounter.fetch_add(1, std::memory_order_acq_rel);
  }
  return t;
}

inline Work& work() {
  static DISPENSO_THREAD_LOCAL Work* w = nullptr;

  if (!w) {
    if (testTid() == 0) {
      w = g_work + 1024;
    } else {
      w = g_work + (testTid() & 1023);
    }
  }
  return *w;
}

void BM_dispenso(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);
  dispenso::ThreadPool pool(num_threads);

  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    for (int i = 0; i < num_elements; ++i) {
      tasks.schedule([i]() { work() += i; });
    }
  }
}

void BM_dispenso_bulk(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);
  dispenso::ThreadPool pool(num_threads);

  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);
    tasks.scheduleBulk(
        static_cast<size_t>(num_elements), [](size_t i) { return [i]() { work() += i; }; });
  }
}

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);
  tbb_compat::task_scheduler_init initsched(num_threads);

  for (auto UNUSED_VAR : state) {
    tbb::task_group g;
    for (int i = 0; i < num_elements; ++i) {
      g.run([i]() { work() += i; });
    }
    g.wait();
  }
}
#endif // !BENCHMARK_WITHOUT_TBB

#if !defined(BENCHMARK_WITHOUT_FOLLY)
void BM_folly(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);
  folly::CPUThreadPoolExecutor follyExec(num_threads);

  for (auto UNUSED_VAR : state) {
    folly::VirtualExecutor tasks(&follyExec);
    for (int i = 0; i < num_elements; ++i) {
      tasks.add([i]() { work() += i; });
    }
  }
}
#endif // !BENCHMARK_WITHOUT_FOLLY

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int s : pow2HalfStepThreads()) {
      b->Args({s, j});
    }
  }
}

#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB

#if !defined(BENCHMARK_WITHOUT_FOLLY)
BENCHMARK(BM_folly)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_FOLLY

BENCHMARK(BM_dispenso)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();
BENCHMARK(BM_dispenso_bulk)->Apply(CustomArguments)->Unit(benchmark::kMicrosecond)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/small_buffer_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/small_buffer_allocator.h>

#include "benchmark_common.h"

constexpr size_t kSmallSize = 32;
constexpr size_t kMediumSize = 128;
constexpr size_t kLargeSize = 256;

template <typename Alloc, typename Free>
void run(benchmark::State& state, Alloc alloc, Free dealloc) {
  std::vector<char*> ptrs(state.range(0));
  for (auto UNUSED_VAR : state) {
    for (char*& p : ptrs) {
      p = alloc();
    }
    for (char* p : ptrs) {
      dealloc(p);
    }
  }
}

template <size_t kSize>
void BM_newdelete(benchmark::State& state) {
  run(state, []() { return new char[kSize]; }, [](char* buf) { delete[] (buf); });
}

template <size_t kSize>
void BM_small_buffer_allocator(benchmark::State& state) {
  run(
      state,
      []() { return dispenso::allocSmallBuffer<kSize>(); },
      [](char* buf) { dispenso::deallocSmallBuffer<kSize>(buf); });
}

BENCHMARK_TEMPLATE(BM_newdelete, kSmallSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kSmallSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_newdelete, kMediumSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kMediumSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_newdelete, kLargeSize)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kLargeSize)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_newdelete, kSmallSize)->Threads(16)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kSmallSize)->Threads(16)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_newdelete, kMediumSize)->Threads(16)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kMediumSize)->Threads(16)->Range(1 << 13, 1 << 15);

BENCHMARK_TEMPLATE(BM_newdelete, kLargeSize)->Threads(16)->Range(1 << 13, 1 << 15);
BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kLargeSize)->Threads(16)->Range(1 << 13, 1 << 15);

BENCHMARK_MAIN();


================================================
FILE: benchmarks/summing_for_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <unordered_map>

#include <dispenso/parallel_for.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_reduce.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

static uint32_t kSeed(8);
static constexpr int kSmallSize = 1000;
static constexpr int kMediumSize = 1000000;
static constexpr int kLargeSize = 100000000;

const std::vector<int>& getInputs(int num_elements) {
  static std::unordered_map<int, std::vector<int>> vecs;
  auto it = vecs.find(num_elements);
  if (it != vecs.end()) {
    return it->second;
  }
  // No need to use a high-quality rng for this test.
  srand(kSeed);
  std::vector<int> values;
  values.reserve(num_elements);
  for (int i = 0; i < num_elements; ++i) {
    values.push_back((rand() & 255) - 127);
  }
  auto res = vecs.emplace(num_elements, std::move(values));
  assert(res.second);
  return res.first->second;
}

void checkResults(const std::vector<int>& inputs, int64_t actual, int foo) {
  int64_t expected = 0;
  for (auto v : inputs) {
    expected += v * v - 3 * foo * v;
  }
  if (expected != actual) {
    std::cerr << "FAIL! " << expected << " vs " << actual << std::endl;
    abort();
  }
}

template <int num_elements>
void BM_serial(benchmark::State& state) {
  auto& input = getInputs(num_elements);
  int64_t sum = 0;
  int foo = 0;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    ++foo;
    for (size_t i = 0; i < num_elements; ++i) {
      sum += input[i] * input[i] - 3 * foo * input[i];
    }
  }
  checkResults(input, sum, foo);
}

void BM_dispenso(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  dispenso::ThreadPool pool(num_threads);

  int64_t sum = 0;
  int foo = 0;

  dispenso::ParForOptions options;
  options.minItemsPerChunk = 50000;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);

    std::vector<int64_t> sums;
    sums.reserve(num_threads + 1);
    ++foo;
    dispenso::parallel_for(
        tasks,
        sums,
        []() { return int64_t{0}; },
        dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kAuto),
        [&input, foo](int64_t& lsumStore, size_t i, size_t end) {
          int64_t lsum = 0;
          for (; i != end; ++i) {
            lsum += input[i] * input[i] - 3 * foo * input[i];
          }
          lsumStore += lsum;
        },
        options);
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo);
}

#if defined(_OPENMP)
void BM_omp(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  omp_set_num_threads(num_threads);

  int64_t sum = 0;

  int foo = 0;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    sum = 0;
    ++foo;
#pragma omp parallel for reduction(+ : sum)
    for (int i = 0; i < num_elements; ++i) {
      sum += input[i] * input[i] - 3 * foo * input[i];
    }
  }
  checkResults(input, sum, foo);
}
#endif /*defined(_OPENMP)*/

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  int64_t sum = 0;

  int foo = 0;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(num_threads);
    ++foo;
    sum = tbb::parallel_reduce(
        tbb::blocked_range<const int*>(&input[0], &input[0] + num_elements),
        int64_t{0},
        [foo](const tbb::blocked_range<const int*>& r, int64_t init) -> int64_t {
          for (const int* a = r.begin(); a != r.end(); ++a)
            init += *a * *a - 3 * foo * *a;
          return init;
        },
        [](int64_t x, int64_t y) -> int64_t { return x + y; });
  }
  checkResults(input, sum, foo);
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_static(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  dispenso::ThreadPool pool(num_threads);

  int64_t sum = 0;
  int foo = 0;

  dispenso::ParForOptions options;
  options.minItemsPerChunk = 50000;

  auto& input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);

    std::vector<int64_t> sums;
    sums.reserve(num_threads + 1);
    ++foo;
    dispenso::parallel_for(
        tasks,
        sums,
        []() { return int64_t{0}; },
        dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kStatic),
        [&input, foo](int64_t& lsumStore, size_t i, size_t end) {
          int64_t lsum = 0;
          for (; i != end; ++i) {
            lsum += input[i] * input[i] - 3 * foo * input[i];
          }
          lsumStore += lsum;
        },
        options);
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo);
}

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
BENCHMARK_TEMPLATE(BM_serial, kLargeSize);

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif // OPENMP
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_static)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: benchmarks/tbb_compat.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

/**
 * TBB compatibility header for supporting both legacy TBB and oneTBB (2021+).
 *
 * oneTBB 2021+ removed task_scheduler_init in favor of global_control and task_arena.
 * oneTBB also changed pipeline.h to parallel_pipeline.h and renamed filter types.
 * This header provides a unified interface that works with both versions.
 */

#if !defined(BENCHMARK_WITHOUT_TBB)

// Include version header to detect TBB version
#if __has_include(<tbb/version.h>)
#include <tbb/version.h>
#elif __has_include(<tbb/tbb_stddef.h>)
#include <tbb/tbb_stddef.h>
#endif

// Detect oneTBB (2021+) vs legacy TBB
#if defined(TBB_VERSION_MAJOR) && TBB_VERSION_MAJOR >= 2021
#define TBB_USE_ONETBB 1
#else
#define TBB_USE_ONETBB 0
#endif

#if TBB_USE_ONETBB
// oneTBB 2021+ uses global_control instead of task_scheduler_init
#include <tbb/global_control.h>
#include <tbb/parallel_pipeline.h>
#include <tbb/task_arena.h>
#include <tbb/task_group.h>

namespace tbb_compat {

// RAII wrapper for thread count control, compatible with old task_scheduler_init API
class task_scheduler_init {
 public:
  explicit task_scheduler_init(int num_threads)
      : control_(tbb::global_control::max_allowed_parallelism, num_threads) {}

 private:
  tbb::global_control control_;
};

// Pipeline filter mode compatibility
// In oneTBB, tbb::filter::serial -> tbb::filter_mode::serial_in_order
// In oneTBB, tbb::filter::parallel -> tbb::filter_mode::parallel
namespace filter {
constexpr auto serial = tbb::filter_mode::serial_in_order;
constexpr auto parallel = tbb::filter_mode::parallel;
} // namespace filter

} // namespace tbb_compat

#else
// Legacy TBB (< 2021)
#include <tbb/pipeline.h>
#include <tbb/task_group.h>
#include <tbb/task_scheduler_init.h>

namespace tbb_compat {

using task_scheduler_init = tbb::task_scheduler_init;

// Pipeline filter mode compatibility - just alias the legacy types
namespace filter {
constexpr auto serial = tbb::filter::serial;
constexpr auto parallel = tbb::filter::parallel;
} // namespace filter

} // namespace tbb_compat

#endif // TBB_USE_ONETBB

#endif // !BENCHMARK_WITHOUT_TBB


================================================
FILE: benchmarks/thread_benchmark_common.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#ifdef _POSIX_C_SOURCE
#include <sys/resource.h>
#endif // _POSIX_C_SOURCE

#include <cmath>
#include <iostream>
#include <thread>

#include "benchmark_common.h"

inline std::vector<int> pow2HalfStepThreads() {
  const int kRunningThreads = std::thread::hardware_concurrency();
  std::vector<int> result;
  result.push_back(1);
  for (int block = 2; block <= kRunningThreads; block *= 2) {
    int step = block / 2;

    for (int i = block; i < 2 * block && i <= kRunningThreads; i += step) {
      result.push_back(i);
    }
  }
  return result;
}

#if defined(_POSIX_C_SOURCE) || defined(__MACH__)
struct rusage g_rusage;

inline void startRusage() {
  std::atomic_thread_fence(std::memory_order_acquire);
  getrusage(RUSAGE_SELF, &g_rusage);
  std::atomic_thread_fence(std::memory_order_release);
}

inline double duration(struct timeval start, struct timeval end) {
  return (end.tv_sec + 1e-6 * end.tv_usec) - (start.tv_sec + 1e-6 * start.tv_usec);
}

inline void endRusage(benchmark::State& state) {
  std::atomic_thread_fence(std::memory_order_acquire);
  struct rusage res;
  getrusage(RUSAGE_SELF, &res);
  std::atomic_thread_fence(std::memory_order_release);

  double userTime = duration(g_rusage.ru_utime, res.ru_utime);
  double sysTime = duration(g_rusage.ru_stime, res.ru_stime);

  state.counters["\t0 User"] = userTime;
  state.counters["\t1 System"] = sysTime;
}
#else
inline void startRusage() {}
inline void endRusage(benchmark::State& state) {}
#endif //_POSIX_C_SOURCE

inline double getMean(const std::vector<double>& data) {
  double sum = 0.0;
  for (auto d : data) {
    sum += d;
  }
  return sum / data.size();
}

inline double getStddev(double mean, const std::vector<double>& data) {
  double sumsq = 0.0;
  for (auto d : data) {
    auto dev = mean - d;
    sumsq += dev * dev;
  }
  return std::sqrt(sumsq / data.size());
}

void doStats(const std::vector<double>& times, benchmark::State& state) {
  double mean = getMean(times);
  state.counters["mean"] = mean;
  state.counters["stddev"] = getStddev(mean, times);
}


================================================
FILE: benchmarks/timed_task_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/timed_task.h>

#include <deque>

#include <dispenso/completion_event.h>
#include <dispenso/schedulable.h>
#include "thread_benchmark_common.h"

#if !defined(BENCHMARK_WITHOUT_FOLLY)
#include <folly/executors/FunctionScheduler.h>
#include <folly/synchronization/Baton.h>
#endif // !BENCHMARK_WITHOUT_FOLLY

size_t getIterations() {
  // If we want to get a sense of system overhead via getrusage, we need to boost things into the
  // milliseconds range to get any kind of reasonable values.  Typically on Linux I'm seeing that
  // Folly uses slightly more resources (system + user) on average, but that dispenso uses more in
  // the worst cases.  The very worst case is still less than 1% of 1 cpu (~8 seconds active,
  // 50ms of system+user time).
  static const bool itersEnv = getenv("TEST_WITH_MANY_ITERS") != nullptr;
  if (itersEnv) {
    return 2000;
  } else {
    return 200;
  }
}

void absTimesToErrors(std::vector<double>& times, double prevTime, double expectedDelta) {
  for (size_t i = 0; i < times.size(); ++i) {
    auto temp = times[i];
    times[i] -= prevTime;
    times[i] -= expectedDelta;
    times[i] = std::abs(times[i]);
    prevTime = temp;
  }
}

void absTimesToSteadyErrors(std::vector<double>& times, double prevTime, double expectedDelta) {
  for (size_t i = 0; i < times.size(); ++i) {
    times[i] -= prevTime;
    times[i] -= expectedDelta;
    times[i] = std::abs(times[i]);
    prevTime += expectedDelta;
  }
}

#if !defined(BENCHMARK_WITHOUT_FOLLY)
template <size_t kInMs, bool kSteady>
void BM_folly(benchmark::State& state) {
  std::vector<double> times(getIterations());
  std::atomic<size_t> count(0);
  startRusage();
  folly::FunctionScheduler fs;
  if (kSteady) {
    fs.setSteady(true);
  }
  folly::Baton fin;
  fs.addFunction(
      [&] {
        auto cur = count.fetch_add(1, std::memory_order_acq_rel);
        if (cur < times.size()) {
          times[cur] = dispenso::getTime();
        } else if (cur == times.size()) {
          fin.post();
        }
      },
      std::chrono::milliseconds(kInMs),
      "add");
  double prevTime = dispenso::getTime();
  fs.start();
  fin.wait();
  fs.shutdown();

  for (auto UNUSED_VAR : state) {
    // dummy iteration
    state.SetIterationTime(0.1);
  }

  if (kSteady) {
    // We need to adjust prevTime because for steady scheduling, folly tries to schedule right away.
    absTimesToSteadyErrors(times, prevTime - kInMs * 1e-3, kInMs * 1e-3);
  } else {
    absTimesToErrors(times, prevTime, kInMs * 1e-3);
  }
  endRusage(state);

  doStats(times, state);
}

struct FollyItem {
  std::vector<double> times;
  std::atomic<size_t> count{0};
  size_t millis;
  folly::Baton<true, std::atomic> fin;

  FollyItem() : times(getIterations()) {}
};

template <bool kSteady>
void BM_folly_mixed(benchmark::State& state) {
  FollyItem items[3];

  items[0].millis = 1;
  items[1].millis = 4;
  items[2].millis = 3;

  startRusage();
  folly::FunctionScheduler fs;
  if (kSteady) {
    fs.setSteady(true);
  }

  auto doSchedule = [&fs](FollyItem& fitem, const char* name) {
    fs.addFunction(
        [&] {
          auto cur = fitem.count.fetch_add(1, std::memory_order_acq_rel);
          if (cur < fitem.times.size()) {
            fitem.times[cur] = dispenso::getTime();
          } else if (cur == fitem.times.size()) {
            fitem.fin.post();
          }
        },
        std::chrono::milliseconds(fitem.millis),
        name);
  };

  size_t i = 0;
  for (auto name : {"a", "b", "c"}) {
    doSchedule(items[i++], name);
  }

  double prevTime = dispenso::getTime();
  fs.start();
  for (auto& item : items) {
    item.fin.wait();
  }
  fs.shutdown();

  for (auto UNUSED_VAR : state) {
    // dummy iteration
    state.SetIterationTime(0.1);
  }

  if (kSteady) {
    // We need to adjust prevTime because for steady scheduling, folly tries to schedule right away.
    for (auto& item : items) {
      absTimesToSteadyErrors(item.times, prevTime - item.millis * 1e-3, item.millis * 1e-3);
    }
  } else {
    for (auto& item : items) {
      absTimesToErrors(item.times, prevTime, item.millis * 1e-3);
    }
  }
  endRusage(state);

  // append all times
  items[0].times.insert(items[0].times.end(), items[1].times.begin(), items[1].times.end());
  items[0].times.insert(items[0].times.end(), items[2].times.begin(), items[2].times.end());

  doStats(items[0].times, state);
}

#endif

dispenso::TimedTaskScheduler& getScheduler() {
  static const bool rtEnv = getenv("TEST_WITH_REALTIME") != nullptr;
  if (rtEnv) {
    static dispenso::TimedTaskScheduler sched(dispenso::ThreadPriority::kRealtime);
    return sched;
  } else {
    return dispenso::globalTimedTaskScheduler();
  }
}

template <size_t kInMs, bool kSteady>
void BM_dispenso(benchmark::State& state) {
  std::vector<double> times(getIterations());
  std::atomic<size_t> count(0);

  double period = 1e-3 * kInMs;

  startRusage();
  dispenso::CompletionEvent fin;
  double prevTime = dispenso::getTime();

  auto type = kSteady ? dispenso::TimedTaskType::kSteady : dispenso::TimedTaskType::kNormal;

  auto task = getScheduler().schedule(
      dispenso::kImmediateInvoker,
      [&] {
        auto cur = count.fetch_add(1, std::memory_order_acq_rel);
        if (cur < times.size()) {
          times[cur] = dispenso::getTime();
        }
        if (cur + 1 == times.size()) {
          fin.notify();
          return false;
        }
        return true;
      },
      prevTime + period,
      period,
      times.size(),
      type);
  fin.wait();

  for (auto UNUSED_VAR : state) {
    // dummy iteration
    state.SetIterationTime(0.1);
  }

  if (kSteady) {
    absTimesToSteadyErrors(times, prevTime, kInMs * 1e-3);

  } else {
    absTimesToErrors(times, prevTime, kInMs * 1e-3);
  }
  endRusage(state);

  doStats(times, state);
}

struct DispensoItem {
  std::vector<double> times;
  std::atomic<size_t> count{0};
  size_t millis;
  dispenso::CompletionEvent fin;
  DispensoItem() : times(getIterations()) {}
};

template <bool kSteady>
void BM_dispenso_mixed(benchmark::State& state) {
  DispensoItem items[3];
  std::deque<dispenso::TimedTask> tasks;

  items[0].millis = 1;
  items[1].millis = 4;
  items[2].millis = 3;

  startRusage();
  double prevTime = dispenso::getTime();
  auto type = kSteady ? dispenso::TimedTaskType::kSteady : dispenso::TimedTaskType::kNormal;
  auto doSchedule = [&](DispensoItem& ditem) {
    double period = 1e-3 * ditem.millis;

    tasks.push_back(
        getScheduler().schedule(
            dispenso::kImmediateInvoker,
            [&] {
              auto cur = ditem.count.fetch_add(1, std::memory_order_acq_rel);
              if (cur < ditem.times.size()) {
                ditem.times[cur] = dispenso::getTime();
              }
              if (cur + 1 == ditem.times.size()) {
                ditem.fin.notify();
                return false;
              }
              return true;
            },
            prevTime + period,
            period,
            ditem.times.size(),
            type));
  };

  for (size_t i = 0; i < 3; ++i) {
    doSchedule(items[i]);
  }

  for (auto& item : items) {
    item.fin.wait();
  }

  for (auto UNUSED_VAR : state) {
    // dummy iteration
    state.SetIterationTime(0.1);
  }

  if (kSteady) {
    for (auto& item : items) {
      absTimesToSteadyErrors(item.times, prevTime, item.millis * 1e-3);
    }
  } else {
    for (auto& item : items) {
      absTimesToErrors(item.times, prevTime, item.millis * 1e-3);
    }
  }
  endRusage(state);

  // append all times
  items[0].times.insert(items[0].times.end(), items[1].times.begin(), items[1].times.end());
  items[0].times.insert(items[0].times.end(), items[2].times.begin(), items[2].times.end());

  doStats(items[0].times, state);
}

BENCHMARK_TEMPLATE2(BM_dispenso, 2, false)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_dispenso, 4, false)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_dispenso, 6, false)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_dispenso, 2, true)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_dispenso, 4, true)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_dispenso, 6, true)->UseManualTime();

BENCHMARK_TEMPLATE(BM_dispenso_mixed, false)->UseManualTime();
BENCHMARK_TEMPLATE(BM_dispenso_mixed, true)->UseManualTime();

#if !defined(BENCHMARK_WITHOUT_FOLLY)
BENCHMARK_TEMPLATE2(BM_folly, 2, false)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_folly, 4, false)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_folly, 6, false)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_folly, 2, true)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_folly, 4, true)->UseManualTime();
BENCHMARK_TEMPLATE2(BM_folly, 6, true)->UseManualTime();

BENCHMARK_TEMPLATE(BM_folly_mixed, false)->UseManualTime();
BENCHMARK_TEMPLATE(BM_folly_mixed, true)->UseManualTime();
#endif // FOLLY


================================================
FILE: benchmarks/trivial_compute_benchmark.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cmath>

#include <dispenso/parallel_for.h>

#if defined(_OPENMP)
#include <omp.h>
#endif

#if !defined(BENCHMARK_WITHOUT_TBB)
#include "tbb/blocked_range.h"
#include "tbb/parallel_reduce.h"
#include "tbb_compat.h"
#endif // !BENCHMARK_WITHOUT_TBB

#include "thread_benchmark_common.h"

static constexpr int kSmallSize = 100;
static constexpr int kMediumSize = 1000000;
static constexpr int kLargeSize = 100000000;

uint32_t getInputs(int num_elements) {
  srand(num_elements);
  return rand() & 127;
}

inline uint64_t calculate(uint64_t input, uint64_t index, size_t foo) {
  return std::cos(
      std::log(
          std::sin(std::exp(std::sqrt(static_cast<double>((input ^ index) - 3 * foo * input))))));
}

void checkResults(uint32_t input, uint64_t actual, int foo, size_t num_elements) {
  if (!foo)
    return;
  if (input != getInputs(num_elements)) {
    std::cerr << "Failed to recover input!" << std::endl;
    abort();
  }
  uint64_t expected = 0;
  for (size_t i = 0; i < num_elements; ++i) {
    expected += calculate(input, i, foo);
  }
  if (expected != actual) {
    std::cerr << "FAIL! " << expected << " vs " << actual << std::endl;
    abort();
  }
}

template <int num_elements>
void BM_serial(benchmark::State& state) {
  auto input = getInputs(num_elements);
  uint64_t sum = 0;
  int foo = 0;
  for (auto UNUSED_VAR : state) {
    sum = 0;
    ++foo;
    for (size_t i = 0; i < num_elements; ++i) {
      sum += calculate(input, i, foo);
    }
  }
  checkResults(input, sum, foo, num_elements);
}

void BM_dispenso(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  dispenso::ThreadPool pool(num_threads);

  uint64_t sum = 0;
  int foo = 0;

  dispenso::ParForOptions options;
  options.defaultChunking = dispenso::ParForChunking::kAuto;
  options.minItemsPerChunk = 4000;

  auto input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);

    std::vector<uint64_t> sums;
    sums.reserve(num_threads + 1);
    ++foo;
    dispenso::parallel_for(
        tasks,
        sums,
        []() { return uint64_t{0}; },
        0,
        num_elements,
        [input, foo](uint64_t& lsumStore, int i, int end) {
          uint64_t lsum = 0;
          for (; i != end; ++i) {
            lsum += calculate(input, i, foo);
          }
          lsumStore += lsum;
        },
        options);
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo, num_elements);
}

#if defined(_OPENMP)
void BM_omp(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  omp_set_num_threads(num_threads);

  uint64_t sum = 0;

  int foo = 0;

  auto input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    sum = 0;
    ++foo;
#pragma omp parallel for reduction(+ : sum)
    for (int i = 0; i < num_elements; ++i) {
      sum += calculate(input, i, foo);
    }
  }
  checkResults(input, sum, foo, num_elements);
}
#endif /* defined(_OPENMP)*/

#if !defined(BENCHMARK_WITHOUT_TBB)
void BM_tbb(benchmark::State& state) {
  const int num_threads = state.range(0);
  const int num_elements = state.range(1);

  uint64_t sum = 0;

  int foo = 0;

  auto input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    tbb_compat::task_scheduler_init initsched(num_threads);
    ++foo;
    sum = tbb::parallel_reduce(
        tbb::blocked_range<size_t>(0, num_elements),
        uint64_t{0},
        [input, foo](const tbb::blocked_range<size_t>& r, uint64_t init) -> uint64_t {
          for (size_t a = r.begin(); a != r.end(); ++a)
            init += calculate(input, a, foo);
          return init;
        },
        [](uint64_t x, uint64_t y) -> uint64_t { return x + y; });
  }
  checkResults(input, sum, foo, num_elements);
}
#endif // !BENCHMARK_WITHOUT_TBB

void BM_dispenso_static(benchmark::State& state) {
  const int num_threads = state.range(0) - 1;
  const int num_elements = state.range(1);

  dispenso::ThreadPool pool(num_threads);

  uint64_t sum = 0;
  int foo = 0;

  dispenso::ParForOptions options;
  options.minItemsPerChunk = 4000;

  auto input = getInputs(num_elements);
  for (auto UNUSED_VAR : state) {
    dispenso::TaskSet tasks(pool);

    std::vector<uint64_t> sums;
    sums.reserve(num_threads + 1);
    ++foo;
    dispenso::parallel_for(
        tasks,
        sums,
        []() { return uint64_t{0}; },
        dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kStatic),
        [input, foo](uint64_t& lsumStore, int i, int end) {
          uint64_t lsum = 0;
          for (; i != end; ++i) {
            lsum += calculate(input, i, foo);
          }
          lsumStore += lsum;
        },
        options);
    sum = 0;
    for (auto s : sums) {
      sum += s;
    }
  }

  checkResults(input, sum, foo, num_elements);
}

static void CustomArguments(benchmark::internal::Benchmark* b) {
  for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
    for (int i : pow2HalfStepThreads()) {
      b->Args({i, j});
    }
  }
}

BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
BENCHMARK_TEMPLATE(BM_serial, kLargeSize);

#if defined(_OPENMP)
BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
#endif // OPENMP
#if !defined(BENCHMARK_WITHOUT_TBB)
BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
#endif // !BENCHMARK_WITHOUT_TBB
BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();
BENCHMARK(BM_dispenso_static)->Apply(CustomArguments)->UseRealTime();

BENCHMARK_MAIN();


================================================
FILE: cmake/DispensoConfig.cmake.in
================================================
# 
# Copyright (c) Meta Platforms, Inc. and affiliates.
# 
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#

@PACKAGE_INIT@

include(CMakeFindDependencyMacro)

find_dependency(Threads)

if(@DISPENSO_USE_SYSTEM_CONCURRENTQUEUE@)
  find_dependency(concurrentqueue)
endif()

include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@_Exports.cmake")

check_required_components("@PROJECT_NAME@")


================================================
FILE: codecov.yml
================================================
# Codecov configuration for dispenso
# See https://docs.codecov.com/docs/codecovyml-reference

coverage:
  # Overall project coverage settings
  status:
    project:
      default:
        # Require at least 92% overall coverage
        target: 92%
    patch:
      default:
        # New code should have at least 80% coverage
        target: 80%
        # This is informational, not blocking
        informational: true

  # Round coverage to 1 decimal place
  precision: 1

# Ignore third-party code and test files
ignore:
  - "dispenso/third-party/**"
  - "tests/**"
  - "benchmarks/**"

# Comment settings for PRs
comment:
  layout: "reach,diff,flags,files"
  behavior: default
  require_changes: true


================================================
FILE: dispenso/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.12)

file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS *.h)

if(NOT DISPENSO_BUILD_FAST_MATH)
  list(FILTER SOURCES EXCLUDE REGEX "fast_math/")
  list(FILTER HEADERS EXCLUDE REGEX "fast_math/")
endif()

message("SOURCES:  ${SOURCES}")

if(DISPENSO_SHARED_LIB)
  add_compile_definitions(DISPENSO_SHARED_LIB DISPENSO_LIB_EXPORT)
  add_library(dispenso SHARED ${SOURCES} ${HEADERS})

  target_compile_options(dispenso PRIVATE
  $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fvisibility=hidden>
  )
else()
  add_library(dispenso STATIC ${SOURCES} ${HEADERS})
endif()

target_compile_options(dispenso PRIVATE
  $<$<CXX_COMPILER_ID:MSVC>:/W3 /WX>
  # GCC false positive: stringop-overflow through deeply inlined atomics.
  # Seen with GCC 13 on Linux and GCC 14 on macOS ARM64.
  $<$<CXX_COMPILER_ID:GNU>:-Wno-stringop-overflow>
  $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror>
)

if(WIN32)
  target_compile_definitions(dispenso PUBLIC NOMINMAX)
endif()

target_include_directories(dispenso
PUBLIC
  $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..>
  $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
  $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)

if(DISPENSO_USE_SYSTEM_CONCURRENTQUEUE)
  find_package(concurrentqueue CONFIG REQUIRED)
  target_link_libraries(dispenso PUBLIC concurrentqueue::concurrentqueue)
else()
  target_include_directories(dispenso
  PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/third-party>
    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/dispenso/third-party>
  )
endif()

# Export the C++ standard requirement so downstream consumers compile with at
# least the same standard dispenso was built with (default: C++14).
target_compile_features(dispenso PUBLIC cxx_std_${CMAKE_CXX_STANDARD})

set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
find_package(Threads REQUIRED)
target_link_libraries(dispenso PUBLIC Threads::Threads)

check_cxx_source_compiles("
#include <atomic>
#include <stdint.h>
std::atomic<int8_t> a(0);
std::atomic<int16_t> b(0);
std::atomic<int32_t> c(0);
std::atomic<int64_t> d(0);
int main() {
  ++a;
  ++b;
  ++c;
  return ++d;
}
" DISPENSO_HAS_ATOMIC_WITHOUT_LIB)

if (NOT DISPENSO_HAS_ATOMIC_WITHOUT_LIB)
  target_link_libraries(dispenso PUBLIC atomic)
endif()

if(WIN32)
  target_link_libraries(dispenso PUBLIC Synchronization Winmm)
endif()

if (NOT DISPENSO_STANDALONE)
  return()
endif()

## Install library ##

set_target_properties(dispenso
    PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})

install(TARGETS dispenso
  EXPORT ${PROJECT_NAME}_Exports
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    NAMELINK_SKIP
  # on Windows put the dlls into bin
  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
  # ... and the import lib into the devel package
  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)

install(EXPORT ${PROJECT_NAME}_Exports
  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
  NAMESPACE Dispenso::
)

install(TARGETS dispenso
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    NAMELINK_ONLY
  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)

## Install headers ##

# Build list of directory patterns to exclude from header installation.
set(_dispenso_install_excludes "")
if(NOT DISPENSO_BUILD_FAST_MATH)
  list(APPEND _dispenso_install_excludes PATTERN "fast_math" EXCLUDE)
endif()
if(DISPENSO_USE_SYSTEM_CONCURRENTQUEUE)
  list(APPEND _dispenso_install_excludes PATTERN "third-party" EXCLUDE)
endif()

install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
  FILES_MATCHING
    PATTERN *.h
    ${_dispenso_install_excludes}
)

## Generate and install CMake target exports ##

include(CMakePackageConfigHelpers)

configure_package_config_file(
  "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in"
  "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
INSTALL_DESTINATION
  ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
)

write_basic_package_version_file(
  "${PROJECT_NAME}ConfigVersion.cmake"
  VERSION ${PROJECT_VERSION}
  COMPATIBILITY SameMajorVersion
)

install(FILES
  ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
  ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
DESTINATION
  ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
)


================================================
FILE: dispenso/async_request.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file async_request.h
 * @ingroup group_async
 * A file providing AsyncRequest.  This is a bit like a lightweight channel for storing updates to
 * one object, mostly intended to be used as a single producer, single consumer update mechanism.
 **/

#pragma once

#if __cplusplus >= 201703L
#include <optional>
#else
#include <dispenso/detail/op_result.h>
#endif // C++17

#include <dispenso/platform.h>

namespace dispenso {

/**
 * A type for making async requests.  Although it is safe to use from multiple producers and
 * consumers, it is primarily intended to be used from single producer, single consumer.
 *
 * Typically the consumer will request an update of the value from thread 0, and the producer will
 * look whether an update was requested from thread 1.  Once the producer determines an update was
 * requested (updateRequested() returns true), it calls tryEmplaceUpdate() to update the underlying
 * data.  Then when the consumer on thread 0 next calls getUpdate(), an optional wrapper to the
 * updated data is returned, and the AsyncRequest object is reset (it no longer has valid data, and
 * no update will have yet been requested for the next update).
 **/
template <typename T>
class AsyncRequest {
 public:
  // A lightweight std::optional-like type with a subset of functionality.
#if __cplusplus >= 201703L
  using OpResult = std::optional<T>;
#else
  using OpResult = detail::OpResult<T>;
#endif // C++17

  /**
   * The consumer can call this to request an update to the underlying data.  If request has already
   * been made or fulfilled, this is a no-op.
   **/
  void requestUpdate() {
    RequestState state = kNone;
    state_.compare_exchange_strong(state, kNeedsUpdate, std::memory_order_acq_rel);
  }

  /**
   * The producer can check this to determine if an update is needed.
   *
   * @return true if an update is required, false otherwise.
   **/
  bool updateRequested() const {
    return state_.load(std::memory_order_acquire) == kNeedsUpdate;
  }

  /**
   * The producer can try to emplace a new T object in response to a request.
   * @param args The arguments to emplace.
   * @return true if the underlying data was updated.  false if the underlying data is not in need
   * of an update.
   * @note For cases where calling this superflously could be expensive, it is wise to check
   * updateRequested() first.
   **/
  template <typename... Args>
  bool tryEmplaceUpdate(Args&&... args) {
    RequestState state = kNeedsUpdate;
    if (!state_.compare_exchange_strong(state, kUpdating, std::memory_order_acq_rel)) {
      return false;
    }
    obj_.emplace(std::forward<Args>(args)...);
    state_.store(kReady, std::memory_order_release);
    return true;
  }

  /**
   * The consumer can attempt to get an update.
   * @return An optional wrapper to the underlying data.  If no update is ready, nullopt is
   * returned. Once an update has been returned, the AsyncRequest object is returned to a state with
   * no underlying data.
   **/
  OpResult getUpdate() {
    if (state_.load(std::memory_order_acquire) == kReady) {
      auto obj = std::move(obj_);
      state_.store(kNone, std::memory_order_release);
      return obj;
    }
    return {};
  }

 private:
  enum RequestState { kNone, kNeedsUpdate, kUpdating, kReady };
  alignas(kCacheLineSize) std::atomic<RequestState> state_ = {kNone};
  OpResult obj_;
};

} // namespace dispenso


================================================
FILE: dispenso/completion_event.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file completion_event.h
 * @ingroup group_async
 * A file providing a CompletionEvent type, which gives a way to signal to waiting threads that some
 * event has been completed.
 **/

#pragma once

#include <dispenso/platform.h>

#include <dispenso/detail/completion_event_impl.h>

namespace dispenso {

/**
 * A class which can be used for one-time notify/wait scenarios.  It is basically a way to signal to
 * any waiting threads that some event has completed.  There must be a single publisher thread
 * and zero or more waiters on arbitrary threads.  <code>reset</code> may be called to restart a
 * sequence (e.g. after <code>notify</code> occurs and all waiters have successfully exited
 * <code>wait*</code>).
 **/
class CompletionEvent {
 public:
  /**
   * Notify any waiting threads that the event has completed.  It is safe for this to be called
   * before threads call <code>wait</code>.
   **/
  void notify() {
    impl_.notify(1);
  }

  /**
   * Wait for another thread to <code>notify</code>
   **/
  void wait() const {
    impl_.wait(1);
  }

  /**
   * Peek to see if the event has been notified in any thread
   **/
  bool completed() const {
    return impl_.intrusiveStatus().load(std::memory_order_acquire);
  }

  /**
   * Wait for another thread to <code>notify</code> or for the relative timeout to expire, whichever
   * is first.
   *
   * @return true if status is "completed", false if timed out.
   **/
  template <class Rep, class Period>
  bool waitFor(const std::chrono::duration<Rep, Period>& relTime) const {
    return impl_.waitFor(1, relTime);
  }

  /**
   * Wait for another thread to <code>notify</code> or for the absolute timeout to expire, whichever
   * is first.
   *
   * @return true if status is "completed", false if timed out.
   **/
  template <class Clock, class Duration>
  bool waitUntil(const std::chrono::time_point<Clock, Duration>& absTime) const {
    return impl_.waitUntil(1, absTime);
  }

  /**
   * Resets the event to "not-completed".  This should not be called while an active
   * <code>wait*\/notify</code> sequence is still currently in play.
   **/
  void reset() {
    impl_.intrusiveStatus().store(0, std::memory_order_seq_cst);
  }

 private:
  detail::CompletionEventImpl impl_{0};
};

} // namespace dispenso


================================================
FILE: dispenso/concurrent_object_arena.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file concurrent_object_arena.h
 * @ingroup group_containers
 * A file providing concurrent object arena container.
 **/

#pragma once

#include <dispenso/platform.h>

#include <atomic>
#include <cassert>
#include <cstring>
#include <mutex>
#include <vector>

namespace detail {

template <class T>
constexpr T log2i(const T v) {
  T log2 = 0, val = v;
  while (val >>= 1)
    ++log2;
  return log2;
}

} // namespace detail

namespace dispenso {
/**
 * <code>ConcurrentObjectArena</code> is an indexed sequence container that
 * allows concurrent insertion to its end. Insertion never invalidates pointers
 * or references to the rest of the elements. As opposet to std::vector, the
 * elements of a <code>ConcurrentObjectArena</code> are not stored
 * contiguously. In memory it is sequence of individually allocated fixed-size
 * arrays, with additional bookkeeping. The size of arrays is always power of
 * two (to optimize indexed access)
 * <pre>
 *  buffers  |&lt;────     bufferSize      ────&gt;|
 *    ┌─┐    ┌──────────────────────────────┐
 *    │*├───>│                              │
 *    ├─┤    ├──────────────────────────────┤
 *    │*├───>│                              │
 *    ├─┤    ├──────────────────────────────┤
 *    │*├───>│                              │
 *    └─┘    └──────────────────────────────┘
 *</pre>
 **/
template <class T, class Index = size_t, size_t alignment = dispenso::kCacheLineSize>
struct ConcurrentObjectArena {
  ConcurrentObjectArena() = delete;
  /**
   * Construct a <code>ConcurrentObjectArena</code> with given or bigger contiguous array size
   *
   * @param minBuffSize The minimum size of the internal buffer. If given
   * size is not power of 2 the closest bigger power of two would be chosen.
   **/
  explicit ConcurrentObjectArena(const Index minBuffSize)
      : kLog2BuffSize(
            ::detail::log2i(minBuffSize) +
            ((Index{1} << ::detail::log2i(minBuffSize)) == minBuffSize ? 0 : 1)),
        kBufferSize(Index{1} << kLog2BuffSize),
        kMask((Index{1} << kLog2BuffSize) - 1),
        pos_(0),
        allocatedSize_(0),
        buffers_(nullptr),
        buffersSize_(0),
        buffersPos_(0) {
    allocateBuffer();
    allocatedSize_.store(kBufferSize, std::memory_order_relaxed);
  }

  /**
   * Copy constructor
   **/
  ConcurrentObjectArena(const ConcurrentObjectArena<T, Index, alignment>& other)
      : kLog2BuffSize(other.kLog2BuffSize),
        kBufferSize(other.kBufferSize),
        kMask(other.kMask),
        pos_(other.pos_.load(std::memory_order_relaxed)),
        allocatedSize_(other.allocatedSize_.load(std::memory_order_relaxed)),
        buffersSize_(other.buffersSize_),
        buffersPos_(other.buffersPos_) {
    T** otherBuffers = other.buffers_.load(std::memory_order_acquire);
    T** newBuffers = new T*[buffersSize_];
    for (Index i = 0; i < buffersSize_; ++i) {
      void* ptr = detail::alignedMalloc(kBufferSize * sizeof(T), alignment);
#if defined(__cpp_exceptions)
      if (ptr == nullptr)
        throw std::bad_alloc();
#endif // __cpp_exceptions
      std::memcpy(ptr, otherBuffers[i], kBufferSize * sizeof(T));
      newBuffers[i] = static_cast<T*>(ptr);
    }
    buffers_.store(newBuffers, std::memory_order_release);
  }

  /**
   * Move constructor
   **/
  ConcurrentObjectArena(ConcurrentObjectArena<T, Index, alignment>&& other) noexcept
      : kLog2BuffSize(0),
        kBufferSize(0),
        kMask(0),
        pos_(0),
        allocatedSize_(0),
        buffers_(nullptr),
        buffersSize_(0),
        buffersPos_(0) {
    swap(*this, other);
  }

  ~ConcurrentObjectArena() {
    T** buffers = buffers_.load(std::memory_order_acquire);

    for (Index i = 0; i < buffersPos_; i++)
      detail::alignedFree(buffers[i]);

    delete[] buffers;

    for (T** p : deleteLater_)
      delete[] p;
  }

  /**
   * Copy assignment operator.  This is not concurrency safe.
   **/
  ConcurrentObjectArena<T, Index, alignment>& operator=(
      ConcurrentObjectArena<T, Index, alignment> const& other) {
    ConcurrentObjectArena<T, Index, alignment> copy(other);
    swap(*this, copy);
    return *this;
  }

  /**
   * Move assignment operator.  This is not concurrency safe.
   **/
  ConcurrentObjectArena<T, Index, alignment>& operator=(
      ConcurrentObjectArena<T, Index, alignment>&& other) noexcept {
    swap(*this, other);
    return *this;
  }
  /**
   * Grow a container
   *
   * This function is thread safe and never invalidates pointers or
   * references to the rest of the elements. It is lock-free if new elements
   * can be placed in current buffer. It locks if it allocates a new buffer.
   * @param delta New size of the container will be <code>delta</code> elements bigger.
   * @return index of the first element of the allocated group.
   **/
  Index grow_by(const Index delta) {
    Index newPos;
    Index oldPos = pos_.load(std::memory_order_relaxed);

    do {
      Index curSize = allocatedSize_.load(std::memory_order_acquire);

      if (oldPos + delta >= curSize) {
        const std::lock_guard<std::mutex> guard(resizeMutex_);
        curSize = allocatedSize_.load(std::memory_order_relaxed);
        while (oldPos + delta >= curSize) {
          allocateBuffer();
          allocatedSize_.store(curSize + kBufferSize, std::memory_order_release);
          curSize = curSize + kBufferSize;
        }
      }

      newPos = oldPos + delta;
    } while (!std::atomic_compare_exchange_weak_explicit(
        &pos_, &oldPos, newPos, std::memory_order_release, std::memory_order_relaxed));

    constructObjects(oldPos, oldPos + delta);

    return oldPos;
  }

  /**
   * Access an element of the object arena.  Concurrency safe.
   * @param index The index of the element to access.
   * @return A reference to the element at index.
   *
   * @note references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  inline const T& operator[](const Index index) const {
    const Index bufIndex = index >> kLog2BuffSize;
    const Index i = index & kMask;

    return buffers_.load(std::memory_order_acquire)[bufIndex][i];
  }

  /**
   * Access an element of the object arena.  Concurrency safe.
   * @param index The index of the element to access.
   * @return A reference to the element at index.
   *
   * @note references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  inline T& operator[](const Index index) {
    return const_cast<T&>(
        const_cast<const ConcurrentObjectArena<T, Index, alignment>&>(*this)[index]);
  }

  /**
   * Get the size of the object arena.  Concurrency safe.
   * @return The number of elements in the object arena.  Note that elements can be appended
   *concurrently with this call.
   **/
  Index size() const {
    return pos_.load(std::memory_order_relaxed);
  }

  /**
   * The current capacity of the object arena. Concurrency safe.
   * @return The current capacity. Note that elements can be appended concurrently
   **/
  Index capacity() const {
    return allocatedSize_.load(std::memory_order_relaxed);
  }

  /**
   * Number of the internal buffers. Concurrency  safe.
   * @return The current number of buffers. Note that buffers can be appended concurrently
   **/
  Index numBuffers() const {
    return buffersPos_;
  }

  /**
   * Get the pointer to the buffer. Concurrency safe.
   * @param index index of the buffer
   * @return The pointer to the buffer.
   **/
  const T* getBuffer(const Index index) const {
    return buffers_.load(std::memory_order_acquire)[index];
  }

  /**
   * Get the pointer to the buffer. Concurrency safe.
   * @param index index of the buffer
   * @return The pointer to the buffer.
   **/
  T* getBuffer(const Index index) {
    return buffers_.load(std::memory_order_acquire)[index];
  }

  /**
   * Get the used buffer size. not concurrency safe.
   * @param index index of the buffer.
   * @return The used buffer size.
   **/
  Index getBufferSize(const Index index) const {
    const Index numBuffs = numBuffers();
    assert(index < numBuffs);

    if (index < numBuffs - 1)
      return kBufferSize;
    else
      return pos_.load(std::memory_order_relaxed) - (kBufferSize * (numBuffs - 1));
  }

  /**
   * Swap the contents of containers lhs, and rhs.  This is not concurrency safe.
   * @param lhs object arena to swap
   * @param rhs object arena to swap
   **/
  friend void swap(
      ConcurrentObjectArena<T, Index, alignment>& lhs,
      ConcurrentObjectArena<T, Index, alignment>& rhs) noexcept {
    using std::swap;

    swap(lhs.kLog2BuffSize, rhs.kLog2BuffSize);
    swap(lhs.kBufferSize, rhs.kBufferSize);
    swap(lhs.kMask, rhs.kMask);

    const Index rhs_pos = rhs.pos_.load(std::memory_order_relaxed);
    rhs.pos_.store(lhs.pos_.load(std::memory_order_relaxed), std::memory_order_relaxed);
    lhs.pos_.store(rhs_pos, std::memory_order_relaxed);

    const Index rhs_allocatedSize = rhs.allocatedSize_.load(std::memory_order_relaxed);
    rhs.allocatedSize_.store(
        lhs.allocatedSize_.load(std::memory_order_relaxed), std::memory_order_relaxed);
    lhs.allocatedSize_.store(rhs_allocatedSize, std::memory_order_relaxed);

    T** const rhs_buffers = rhs.buffers_.load(std::memory_order_acquire);
    rhs.buffers_.store(lhs.buffers_.load(std::memory_order_acquire), std::memory_order_release);
    lhs.buffers_.store(rhs_buffers, std::memory_order_release);

    swap(lhs.buffersSize_, rhs.buffersSize_);
    swap(lhs.buffersPos_, rhs.buffersPos_);
    swap(lhs.deleteLater_, rhs.deleteLater_);
  }

 private:
  void allocateBuffer() {
    void* ptr = detail::alignedMalloc(kBufferSize * sizeof(T), alignment);
#if defined(__cpp_exceptions)
    if (ptr == nullptr)
      throw std::bad_alloc();
#endif // __cpp_exceptions

    if (buffersPos_ < buffersSize_) {
      buffers_.load(std::memory_order_acquire)[buffersPos_++] = static_cast<T*>(ptr);
    } else {
      const Index oldBuffersSize = buffersSize_;
      T** oldBuffers = buffers_.load(std::memory_order_acquire);

      buffersSize_ = oldBuffersSize == 0 ? 2 : oldBuffersSize * 2;
      T** newBuffers = new T*[buffersSize_];

      if (oldBuffers != nullptr) {
        std::memcpy(newBuffers, oldBuffers, sizeof(T*) * oldBuffersSize);
        deleteLater_.push_back(oldBuffers);
      }

      newBuffers[buffersPos_++] = static_cast<T*>(ptr);
      buffers_.store(newBuffers, std::memory_order_release);
    }
  }

  void constructObjects(const Index beginIndex, const Index endIndex) {
    const Index startBuffer = beginIndex >> kLog2BuffSize;
    const Index endBuffer = endIndex >> kLog2BuffSize;

    Index bufStart = beginIndex & kMask;
    for (Index b = startBuffer; b <= endBuffer; ++b) {
      T* buf = buffers_.load(std::memory_order_acquire)[b];
      const Index bufEnd = b == endBuffer ? (endIndex & kMask) : kBufferSize;
      for (Index i = bufStart; i < bufEnd; ++i)
        new (buf + i) T();

      bufStart = 0;
    }
  }
  //
  //    kBufferSize = 2^kLog2BuffSize
  //    mask = 0b00011111
  //                ──┬──
  //                  └─number of 1s is log2BuffSize
  //
  std::mutex resizeMutex_;

  Index kLog2BuffSize;
  Index kBufferSize;
  Index kMask;

  std::atomic<Index> pos_;
  std::atomic<Index> allocatedSize_;

  std::atomic<T**> buffers_;
  Index buffersSize_;
  Index buffersPos_;
  std::vector<T**> deleteLater_;
};

} // namespace dispenso


================================================
FILE: dispenso/concurrent_vector.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file concurrent_vector.h
 * @ingroup group_containers
 * A file providing a concurrent vector implementation.  The basic implementation is similar to
 * common implementation of deques, with a two-level array structure.  For ConcurrentVector, this is
 * an array of buffers that grow by powers of two.  The interface is intended to be a reasonably
 * complete standin for both std::vector and tbb::concurrent_vector.  When compared to std::vector,
 * it is missing only the .data() accessor, because it is not possible to access the contents as a
 * contiguous buffer.  The interface is also compatible with TBB's concurrent_vector, but provides
 * slightly more functionality to be compatible with std::vector (e.g. .insert() and .erase()), and
 * also to enable higher performance without requiring double-initialization in some grow_by cases
 * (via .grow_by_generator()).  ConcurrentVector also has a reserving Constructor that can allow for
 * better performance when the size (or maximum size, or even a guess at the size) is known ahead of
 * time.
 *
 * Like std::deque, and unlike std::vector, it is possible to use non-movable objects in
 * ConcurrentVector, and references are not invalidated when growing the ConcurrentVector. Iterators
 * are also stable under these conditions.  One other important difference to the std containers,
 * and also to tbb::concurrentvector is that ConcurrentVector does not take an Allocator, but just
 * uses appropriately aligned malloc/free for allocation.
 *
 * Basically speaking, it is possible to grow the ConcurrentVector concurrently (e.g. via
 * .grow_by(), .emplace_back(), etc...) very quickly, and it is safe to iterate ranges of the vector
 * that have already been inserted (and e.g. .begin(), and .end() are thread-safe).  As with TBB's
 * implementation, it is not safe to concurrently call .pop_back(), .reserve(), .resize(), etc...
 * The operation of these functions is lock-free if memory allocation is also lock-free.
 *
 * @note Synchronization model: concurrent growth operations (push_back, emplace_back, grow_by)
 * provide thread-safe index allocation, but do NOT guarantee that elements are visible to other
 * threads immediately.  Reading an element that is being concurrently constructed by another thread
 * is a data race.  Callers must establish their own happens-before relationship (e.g. via a mutex,
 * atomic flag, or task completion) between the writer finishing construction and the reader
 * accessing the element.  This is consistent with tbb::concurrent_vector's model.
 *
 * As of this writing, with the benchmarks developed for testing ConcurrentVector, ConcurrentVector
 * appears to be faster than tbb::concurrent_vector by a factor of between about 15% and 3x
 * (depending on the operation).  ConcurrentVector's iteration and random access is on-par with
 * std::deque (libstdc++), sometimes a bit faster, sometimes a bit slower, but .push_back() is about
 * an order of magnitude slower for serial use (note that by using one of the .grow_by() variants
 * could make this on-par for serial code, if applicable, see the "alternative" benchmarks).
 *
 * Most notably, in the parallel growth benchmarks, (on Clang 8, Linux, 32-core Threadripper
 * 2990WX), ConcurrentVector is between about 5x and 20x faster than std::vector + std::mutex, and
 * is about 1.6x faster than tbb::concurrent_vector.
 **/

#pragma once

#include <algorithm>
#include <cassert>
#include <climits>
#include <cstring>
#include <initializer_list>
#include <stdexcept>
#include <utility>

#include <dispenso/detail/math.h>
#include <dispenso/platform.h>
#include <dispenso/tsan_annotations.h>

// Whether to use a non-atomic buffer pointer cache for read-hot paths.
// Disabled on ARM where cache-line writes on every growth operation cause store buffer
// pressure that exceeds the read-path benefit (acquire loads are effectively free for
// sequential access patterns on ARM).
#if !defined(DISPENSO_HAS_CACHED_PTRS)
#if !defined(__aarch64__) && !defined(_M_ARM64)
#define DISPENSO_HAS_CACHED_PTRS 1
#else
#define DISPENSO_HAS_CACHED_PTRS 0
#endif
#endif

namespace dispenso {

/**
 * Available strategies to use for ConcurrentVector reallocation.  kFullBufferAhead means that once
 * we begin to use a buffer, we will also ensure that the next buffer is allocated. kHalfBufferAhead
 * means that once we begin to use an address halfway through the current buffer, we will allocate
 * the next buffer.  kAsNeeded means that once we are ready to use an address in the next buffer (we
 * are using the last valid address in the current buffer), we allocate the next buffer.  kAsNeeded
 * should be the default to avoid using too much memory, but a small speedup is possible due to less
 * thread waiting when the other options are used.  kFullBufferAhead is fastest, then
 * kHalfBufferAhead, then kAsNeeded, and the correspondingly kFullBufferAhead uses the most memory,
 * then kHalfBufferAhead, and kAsNeeded uses the least.
 **/
enum class ConcurrentVectorReallocStrategy { kFullBufferAhead, kHalfBufferAhead, kAsNeeded };

struct ReserveTagS {};
/**
 * This ReserveTag can be passed into the reserving constructor for better performance, if even a
 * rough guess can be made at the number of elements that will be required.
 **/
constexpr ReserveTagS ReserveTag;

// Textual inclusion.  Includes undocumented implementation details, e.g. iterators.
#include <dispenso/detail/concurrent_vector_impl.h>

/**
 * The default ConcurrentVector traits type for defining capacities and max capacities.  Both
 * members are required should one wish to supply a custom set of traits.
 **/
template <typename T>
struct DefaultConcurrentVectorSizeTraits {
  /**
   * @brief This is the starting user-expected capacity in number of elements (algorithm may provide
   * more based on kReallocStrategy)
   **/
  static constexpr size_t kDefaultCapacity = (sizeof(T) >= 256) ? 2 : 512 / sizeof(T);

  /**
   * @brief The maximum possible size for the vector.
   *
   * The reason this exists is because if someone doesn't require vectors of length in e.g.
   * petabytes, the class can use less space.  Additionally, some minor optimizations may be
   * possible if the max size is less than 32-bits.
   **/
  static constexpr size_t kMaxVectorSize =
      (size_t{1} << (sizeof(size_t) * CHAR_BIT > 47 ? 47 : sizeof(size_t) * CHAR_BIT - 1)) /
      sizeof(T);
};

/**
 * The default ConcurrentVector traits type.  All members are required should one wish to supply a
 * custom set of traits.
 **/
struct DefaultConcurrentVectorTraits {
  /**
   * @brief Prefer to place the pointers to the buffers inline in the class.
   *
   * This can consume a lot of space, e.g. on the stack.  This can be a couple of kilobytes.  But
   * performance is improved by 5% to 15%.  When set to false, this results in those pointers
   * residing in a separate heap allocation.
   **/
  static constexpr bool kPreferBuffersInline = true;

  /**
   * @brief How far/whether to allocate before memory is required.
   *
   * We can allocate ahead, which may reduce the chances of another thread blocking while waiting
   * for memory to be allocated.  This implies a pretty stiff memory overhead, which people may not
   * want to pay (up to 3x overhead after the first bucket for kFullBufferAhead).  This can be set
   * to kAsNeeded instead of kFullBufferAhead, which makes overheads similar to typical std::vector
   * implementation (up to 2x overhead).  Performance differences are modest, so unless you need to
   * squeeze the most out of your use, kAsNeeded is okay.
   **/
  static constexpr ConcurrentVectorReallocStrategy kReallocStrategy =
      ConcurrentVectorReallocStrategy::kAsNeeded;

  /**
   * @brief Should we prefer faster, but larger iterators, or slower, but smaller iterators.
   *
   * If an algorithm needs to store iterators for later use, size overhead could become a
   * concern.  By using kIteratorPreferSpeed == true, the size of an iterator will be roughly double
   * the iterator when kIteratorPreferSpeed == false.  Conversely, iterate + dereference is also
   * about twice as fast.
   *
   **/
  static constexpr bool kIteratorPreferSpeed = true;
};

/**
 * A concurrent vector type.  It is safe to call .push_back(), .emplace_back(), the various .grow_()
 * functions, .begin(), .end(), .size(), .empty() concurrently, and existing iterators and
 *references remain valid with these functions' use.
 **/
template <
    typename T,
    typename Traits = DefaultConcurrentVectorTraits,
    typename SizeTraits = DefaultConcurrentVectorSizeTraits<T>>
class ConcurrentVector {
 public:
  using value_type = T;
  using reference = T&;
  using const_reference = const T&;
  using size_type = size_t;
  using difference_type = ssize_t;
  using reference_type = T&;
  using const_reference_type = const T&;
  using pointer = T*;
  using const_pointer = const T*;
  using iterator = std::conditional_t<
      Traits::kIteratorPreferSpeed,
      cv::ConcurrentVectorIterator<ConcurrentVector<T, Traits>, T, false>,
      cv::CompactCVecIterator<ConcurrentVector<T, Traits>, T, false>>;
  using const_iterator = std::conditional_t<
      Traits::kIteratorPreferSpeed,
      cv::ConcurrentVectorIterator<ConcurrentVector<T, Traits>, T, true>,
      cv::CompactCVecIterator<ConcurrentVector<T, Traits>, T, true>>;
  using reverse_iterator = std::reverse_iterator<iterator>;
  using const_reverse_iterator = std::reverse_iterator<const_iterator>;

  /**
   * Default construct the ConcurrentVector.
   **/
  ConcurrentVector() : ConcurrentVector(SizeTraits::kDefaultCapacity / 2, ReserveTag) {}

  /**
   * The reserving constructor.  By supplying a reasonable starting capacity, e.g. close to max
   * expected vector size, this can often improve performance substantially by reducing allocations
   * and increasing data coherency.
   **/
  ConcurrentVector(size_t startCapacity, ReserveTagS)
      : firstBucketShift_(
            detail::log2(
                detail::nextPow2(std::max(startCapacity, SizeTraits::kDefaultCapacity / 2)))),
        firstBucketLen_(size_type{1} << firstBucketShift_) {
    T* firstTwo = cv::alloc<T>(2 * firstBucketLen_);
    initCachedPtrs();
    setCachedPtr(0, firstTwo);
    setCachedPtr(1, firstTwo + firstBucketLen_);
    buffers_[0].store(firstTwo, std::memory_order_release);
    buffers_[1].store(firstTwo + firstBucketLen_, std::memory_order_release);
  }

  /**
   * Sizing constructor with default initialization.
   **/
  explicit ConcurrentVector(size_t startSize) : ConcurrentVector(startSize, ReserveTag) {
    size_.store(startSize, std::memory_order_relaxed);
    T* buf = buffers_[0].load(std::memory_order_relaxed);
    for (size_t i = 0; i < startSize; ++i) {
      new (buf + i) T();
    }
  }

  /**
   * Sizing constructor with specified default value.
   **/
  ConcurrentVector(size_t startSize, const T& defaultValue)
      : ConcurrentVector(startSize, ReserveTag) {
    size_.store(startSize, std::memory_order_relaxed);
    T* buf = buffers_[0].load(std::memory_order_relaxed);
    for (size_t i = 0; i < startSize; ++i) {
      new (buf + i) T(defaultValue);
    }
  }

  /**
   * Constructor taking an iterator range.
   **/
  template <typename InIterator>
  ConcurrentVector(InIterator start, InIterator end)
      : ConcurrentVector(std::distance(start, end), start, end) {}

  /**
   * Sizing constructor taking an iterator range.  If size is known in advance, this may be faster
   * than just providing the iterator range, especially for input iterators that are not random
   * access.
   **/
  template <typename InIterator>
  ConcurrentVector(size_type startSize, InIterator start, InIterator end)
      : ConcurrentVector(startSize, ReserveTag) {
    size_.store(startSize, std::memory_order_relaxed);
    assert(std::distance(start, end) == static_cast<difference_type>(startSize));
    internalInit(start, end, begin());
  }

  /**
   * Construct via initializer list
   **/
  ConcurrentVector(std::initializer_list<T> l)
      : ConcurrentVector(l.size(), std::begin(l), std::end(l)) {}

  /**
   * Copy constructor
   **/
  ConcurrentVector(const ConcurrentVector& other)
      : ConcurrentVector(other.size(), other.cbegin(), other.cend()) {}

  /**
   * Move constructor
   **/
  ConcurrentVector(ConcurrentVector&& other)
      : buffers_(std::move(other.buffers_)),
        firstBucketShift_(other.firstBucketShift_),
        firstBucketLen_(other.firstBucketLen_),
        size_(other.size_.load(std::memory_order_relaxed)) {
    moveCachedPtrsFrom(other);
    other.size_.store(0, std::memory_order_relaxed);
    // This is possibly unnecessary overhead, but enables the "other" vector to be in a valid,
    // usable state right away, no empty check or clear required, as it is for std::vector.
    T* firstTwo = cv::alloc<T>(2 * firstBucketLen_);
    other.setCachedPtr(0, firstTwo);
    other.setCachedPtr(1, firstTwo + firstBucketLen_);
    other.buffers_[0].store(firstTwo, std::memory_order_relaxed);
    other.buffers_[1].store(firstTwo + firstBucketLen_, std::memory_order_relaxed);
  }

  /**
   * Copy assignment operator.  This is not concurrency safe.
   **/
  ConcurrentVector& operator=(const ConcurrentVector& other) {
    if (&other == this) {
      return *this;
    }

    clear();
    reserve(other.size());
    size_.store(other.size(), std::memory_order_relaxed);
    internalInit(other.cbegin(), other.cend(), begin());

    return *this;
  }

  /**
   * Move assignment operator.  This is not concurrency safe.
   **/
  ConcurrentVector& operator=(ConcurrentVector&& other) {
    using std::swap;
    if (&other == this) {
      return *this;
    }

    clear();
    swap(firstBucketShift_, other.firstBucketShift_);
    swap(firstBucketLen_, other.firstBucketLen_);
    buffers_ = std::move(other.buffers_);
    swapCachedPtrs(other);
    size_t curLen = size_.load(std::memory_order_relaxed);
    size_.store(other.size_.load(std::memory_order_relaxed), std::memory_order_relaxed);
    other.size_.store(curLen, std::memory_order_relaxed);

    return *this;
  }

  /**
   * Assign the vector.  Not concurrency safe.
   *
   * @param count The number of elements to have in the vector
   * @param value The value to copy into each element
   **/
  void assign(size_type count, const T& value) {
    clear();
    reserve(count);
    size_.store(count, std::memory_order_relaxed);
    internalFillN(begin(), count, value);
  }

  /**
   * Assign the vector.  Not concurrency safe.
   *
   * @param start The beginning of the iterator range to assign into the vector
   * @param end The end of the iterator range to assign into the vector
   **/
  template <
      typename It,
      typename = typename std::iterator_traits<It>::difference_type,
      typename = typename std::iterator_traits<It>::pointer,
      typename = typename std::iterator_traits<It>::reference,
      typename = typename std::iterator_traits<It>::value_type,
      typename = typename std::iterator_traits<It>::iterator_category>
  void assign(It start, It end) {
    clear();
    auto count = std::distance(start, end);
    reserve(count);
    size_.store(count, std::memory_order_relaxed);
    internalInit(start, end, begin());
  }

  /**
   * Reserve some capacity for the vector.  Not concurrency safe.
   *
   * @param capacity The amount of space to ensure is available to avoid further allocations.
   **/
  void reserve(difference_type capacity) {
    auto bend = bucketAndSubIndex(capacity);
    allocateBufferRange({0, 0, firstBucketLen_}, capacity, bend);
  }

  /**
   * Resize the vector, using default initialization for any new values.  Not concurrency safe.
   *
   * @param len The length of the vector after the resize.
   **/
  void resize(difference_type len) {
    difference_type curLen = static_cast<difference_type>(size_.load(std::memory_order_relaxed));
    if (curLen < len) {
      grow_to_at_least(len);
    } else if (curLen > len) {
      auto it = end();
      auto newEnd = begin() + len;
      do {
        --it;
        it->~T();
      } while (it != newEnd);
      size_.store(len, std::memory_order_relaxed);
    }
  }

  /**
   * Resize the vector, copying the provided value into any new elements.  Not concurrency safe.
   *
   * @param len The length of the vector after the resize.
   * @param value The value to copy into any new elements.
   **/
  void resize(difference_type len, const T& value) {
    difference_type curLen = static_cast<difference_type>(size_.load(std::memory_order_relaxed));
    if (curLen < len) {
      grow_to_at_least(len, value);
    } else if (curLen > len) {
      auto it = end();
      auto newEnd = begin() + len;
      do {
        --it;
        it->~T();
      } while (it != newEnd);
      size_.store(len, std::memory_order_relaxed);
    }
  }

  /**
   * The default capacity of this vector.
   * @return The capacity if the vector were cleared and then called shrink_to_fit.
   **/
  size_type default_capacity() const {
    return 2 * firstBucketLen_;
  }

  /**
   * The current capacity of the vector.  Not concurrency safe.
   * @return The current capacity.
   **/
  size_type capacity() const {
    size_t cap = 2 * firstBucketLen_;
    for (size_t b = 2; b < kMaxBuffers; ++b) {
      if (!buffers_[b].load(std::memory_order_relaxed)) {
        break;
      }
      cap *= 2;
    }
    return cap;
  }

  /**
   * Clear the vector.  This does not deallocate buffers, but just ensures that all elements are
   * destructed.  Size will be zero after the call.  Not concurrency safe.
   **/
  void clear() {
    auto binfo = bucketAndSubIndex(size_.load(std::memory_order_relaxed));
    size_t len = binfo.bucketIndex;
    size_t cap = binfo.bucketCapacity;
    size_t b = binfo.bucket;
    do {
      T* buf = buffers_[b].load(std::memory_order_relaxed);
      T* t = buf + len;

      while (t != buf) {
        --t;
        t->~T();
      }
      cap >>= int{b > 1};
      len = cap;
    } while (b--);
    size_.store(0, std::memory_order_release);
  }

  /**
   * Gets rid of extra capacity that is not needed to maintain preconditions.  At least the default
   * capacity will remain, even if the size of the vector is zero.  Not concurrency safe.
   **/
  void shrink_to_fit() {
    constexpr size_t kMaxExtra = 2;
    auto binfo = bucketAndSubIndex(size_.load(std::memory_order_relaxed));

    // We need to at least skip the first two buckets, since we have those as a single allocation.
    size_t startBucket = std::max<size_t>(2, binfo.bucket + kMaxExtra);

    for (size_t b = startBucket; b < kMaxBuffers; ++b) {
      T* ptr = buffers_[b].load(std::memory_order_relaxed);
      if (!ptr) {
        break;
      }
      if (buffers_.shouldDealloc(b)) {
        cv::dealloc<T>(ptr);
      }
      clearCachedPtr(b);
      buffers_[b].store(nullptr, std::memory_order_release);
    }
  }

  /**
   * Destruct the vector.
   **/
  ~ConcurrentVector() {
    clear();
    shrink_to_fit();
    cv::dealloc<T>(buffers_[0].load(std::memory_order_acquire));
  }

  /**
   * Insert a value. Not concurrency safe.
   * @param pos The point of insertion.
   * @param value The value to copy into the element at pos.
   * @return The iterator at the inserted position.
   **/
  iterator insert(const_iterator pos, const T& value) {
    auto it = insertPartial(pos);
    new (&*it) T(value);
    return it;
  }

  /**
   * Insert a value.  Not concurrency safe.
   * @param pos The point of insertion.
   * @param value The value to move into the element at pos.
   * @return The iterator at the inserted position.
   **/
  iterator insert(const_iterator pos, T&& value) {
    auto it = insertPartial(pos);
    new (&*it) T(std::move(value));
    return it;
  }

  /**
   * Insert a value. Not concurrency safe.
   * @param pos The point of insertion.
   * @param count The number of elements to insert.
   * @param value The value to copy into each inserted element starting at pos.
   * @return The iterator at the inserted position.
   **/
  iterator insert(const_iterator pos, size_type count, const T& value) {
    auto it = insertPartial(pos, count);
    std::fill_n(it, count, value);
    return it;
  }

  /**
   * Insert a range of values. Not concurrency safe.
   * @param pos The point of insertion.
   * @param first The start of the input iterator range.
   * @param last The end of the input iterator range.
   * @return The iterator at the inserted position.
   **/
  template <
      typename InputIt,
      typename = typename std::iterator_traits<InputIt>::difference_type,
      typename = typename std::iterator_traits<InputIt>::pointer,
      typename = typename std::iterator_traits<InputIt>::reference,
      typename = typename std::iterator_traits<InputIt>::value_type,
      typename = typename std::iterator_traits<InputIt>::iterator_category>
  iterator insert(const_iterator pos, InputIt first, InputIt last) {
    size_t len = std::distance(first, last);
    auto it = insertPartial(pos, len);
    std::copy_n(first, len, it);
    return it;
  }

  /**
   * Insert an initializer_list. Not concurrency safe.
   * @param pos The point of insertion.
   * @param ilist The initializer_list.
   * @return The iterator at the inserted position.
   **/
  iterator insert(const_iterator pos, std::initializer_list<T> ilist) {
    return insert(pos, ilist.begin(), ilist.end());
  }

  /**
   * Erase one element.  Not concurrency safe.
   * @param pos The point of erasure.
   * @return Iterator following the removed element. If pos refers to the last element, then
   * the end() iterator is returned.
   **/
  iterator erase(const_iterator pos) {
    auto e = end();
    if (e == pos) {
      return e;
    }
    size_.fetch_sub(1, std::memory_order_relaxed);
    --e;
    if (e == pos) {
      e->~T();
      return e;
    }
    ++e;
    auto it = begin();
    it += (pos - it);
    return std::move(pos + 1, const_iterator(e), it);
  }

  /**
   * Erase a range of elements.  Not concurrency safe.
   * @param first The starting point of erasure.
   * @param last The ending point of erasure.
   * @return Iterator following the last removed element. If pos refers to the last element, then
   * the end() iterator is returned. If last==end() prior to removal, then the updated end()
   * iterator is returned. If [first, last) is an empty range, then last is returned.
   **/
  iterator erase(const_iterator first, const_iterator last) {
    size_t len = std::distance(first, last);
    auto it = begin();
    size_t startIdx = first - it;
    if (len == 0) {
      return it + (startIdx + len);
    }
    it += startIdx;

    auto e_it = std::move(last, cend(), it);

    if (e_it < last) {
      // remove any values that were not already moved into
      do {
        --last;
        last->~T();
      } while (e_it != last);
    }
    size_.fetch_sub(len, std::memory_order_relaxed);
    return e_it;
  }

  /**
   * Push a value onto the end of the vector.  Concurrency safe.
   * @param val The value to copy into the new element.
   * @return The iterator at the point of insertion.
   **/
  iterator push_back(const T& val) {
    return emplace_back(val);
  }

  /**
   * Push a value onto the end of the vector.  Concurrency safe.
   * @param val The value to move into the new element.
   * @return The iterator at the point of insertion.
   **/
  iterator push_back(T&& val) {
    return emplace_back(std::move(val));
  }

  /**
   * Push a value onto the end of the vector.  Concurrency safe.
   * @param args The arg pack used to construct the new element.
   * @return The iterator at the point of insertion.
   **/
  template <typename... Args>
  iterator emplace_back(Args&&... args) {
    auto index = size_.fetch_add(1, std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(index);

    allocateBuffer(binfo);

    iterator ret{this, index, binfo};

    if (Traits::kIteratorPreferSpeed) {
      new (&*ret) T(std::forward<Args>(args)...);
    } else {
      new (buffers_[binfo.bucket] + binfo.bucketIndex) T(std::forward<Args>(args)...);
    }

    return ret;
  }

  /**
   * Grow the vector, constructing new elements via a generator.  Concurrency safe.
   * @param delta The number of elements to grow by.
   * @param gen The generator to use to construct new elements.  Must have operator()() and return a
   * valid T.
   * @return The iterator to the start of the grown range.
   **/
  template <typename Gen>
  iterator grow_by_generator(size_type delta, Gen gen) {
    iterator ret = growByUninitialized(delta);
    for (auto it = ret; delta--; ++it) {
      new (&*it) T(gen());
    }
    return ret;
  }

  /**
   * Grow the vector, copying the provided value into new elements.  Concurrency safe.
   * @param delta The number of elements to grow by.
   * @param t The value to copy into all new elements.
   * @return The iterator to the start of the grown range.
   **/
  iterator grow_by(size_type delta, const T& t) {
    iterator ret = growByUninitialized(delta);
    internalFillN(ret, delta, t);
    return ret;
  }

  /**
   * Grow the vector, default initializing new elements.  Concurrency safe.
   * @param delta The number of elements to grow by.
   * @return The iterator to the start of the grown range.
   **/
  iterator grow_by(size_type delta) {
    iterator ret = growByUninitialized(delta);
    internalFillDefaultN(ret, delta);
    return ret;
  }

  /**
   * Grow the vector with an input iterator range.  Concurrency safe.
   * @param start The start of the input iterator range.
   * @param end The end of the input iterator range.
   * @return The iterator to the start of the grown range.
   **/
  template <
      typename It,
      typename = typename std::iterator_traits<It>::difference_type,
      typename = typename std::iterator_traits<It>::pointer,
      typename = typename std::iterator_traits<It>::reference,
      typename = typename std::iterator_traits<It>::value_type,
      typename = typename std::iterator_traits<It>::iterator_category>
  iterator grow_by(It start, It end) {
    iterator ret = growByUninitialized(std::distance(start, end));
    internalInit(start, end, ret);
    return ret;
  }

  /**
   * Grow the vector with an initializer_list.  Concurrency safe.
   * @param initList The initializer_list to use to initialize the newly added range.
   * @return The iterator to the start of the grown range.
   **/
  iterator grow_by(std::initializer_list<T> initList) {
    return grow_by(std::begin(initList), std::end(initList));
  }

  /**
   * Grow the vector to at least the desired size, default initializing new elements.  Concurrency
   * safe.
   * @param n The required length
   * @return The iterator to the nth element.
   **/
  iterator grow_to_at_least(size_type n) {
    size_t curSize = size_.load(std::memory_order_relaxed);
    if (curSize < n) {
      return grow_by(n - curSize);
    }
    return {this, n - 1, bucketAndSubIndex(n - 1)};
  }

  /**
   * Grow the vector to at least the desired size, copying the provided value into new elements.
   * Concurrency safe.
   * @param n The required length
   * @param t The value to copy into each new element.
   * @return The iterator to the nth element.
   **/
  iterator grow_to_at_least(size_type n, const T& t) {
    size_t curSize = size_.load(std::memory_order_relaxed);
    if (curSize < n) {
      return grow_by(n - curSize, t);
    }
    return {this, n - 1, bucketAndSubIndex(n - 1)};
  }

  /**
   * Pop the last element off the vector.  Not concurrency safe.
   **/
  void pop_back() {
    auto binfo = bucketAndSubIndex(size_.fetch_sub(1, std::memory_order_relaxed) - 1);
    T* elt = buffers_[binfo.bucket].load(std::memory_order_relaxed) + binfo.bucketIndex;
    elt->~T();
  }

  /**
   * Access an element of the vector.  Concurrency safe.
   * @param index The index of the element to access.
   * @return A reference to the element at index.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const T& operator[](size_type index) const {
    auto binfo = bucketAndSubIndexForIndex(index);
    T* buf = cachedBuffer(binfo.bucket);
    return buf[binfo.bucketIndex];
  }

  /**
   * Access an element of the vector.  Concurrency safe.
   * @param index The index of the element to access.
   * @return A reference to the element at index.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  T& operator[](size_type index) {
    auto binfo = bucketAndSubIndexForIndex(index);
    T* buf = cachedBuffer(binfo.bucket);
    return buf[binfo.bucketIndex];
  }

  /**
   * Access an element of the vector.  Out-of-boundes accesses generate an exception.  Concurrency
   * safe.
   * @param index The index of the element to access.
   * @return A reference to the element at index.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const T& at(size_type index) const {
#if defined(__cpp_exceptions)
    if (index >= size_.load(std::memory_order_relaxed)) {
      throw std::out_of_range("Index too large");
    }
#endif
    return operator[](index);
  }

  /**
   * Access an element of the vector.  Out-of-boundes accesses generate an exception.  Concurrency
   * safe.
   * @param index The index of the element to access.
   * @return A reference to the element at index.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  T& at(size_type index) {
#if defined(__cpp_exceptions)
    if (index >= size_.load(std::memory_order_relaxed)) {
      throw std::out_of_range("Index too large");
    }
#endif
    return operator[](index);
  }

  /**
   * Get an iterator to the start of the vector.  Concurrency safe.
   * @return An iterator to the start of the vector.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  iterator begin() {
    return {this, 0, {0, 0, firstBucketLen_}};
  }

  /**
   * Get an iterator to the end of the vector.  Concurrency safe.
   * @return An iterator to the end of the vector.  Note that it is possible for the end to change
   * concurrently with this call.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  iterator end() {
    size_t curSize = size_.load(std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(curSize);
    return {this, curSize, binfo};
  }

  /**
   * Get an iterator to the start of the vector.  Concurrency safe.
   * @return An iterator to the start of the vector.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const_iterator begin() const {
    return {this, 0, {0, 0, firstBucketLen_}};
  }

  /**
   * Get an iterator to the end of the vector.  Concurrency safe.
   * @return An iterator to the end of the vector.  Note that it is possible for the end to change
   * concurrently with this call.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const_iterator end() const {
    size_t curSize = size_.load(std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(curSize);
    return {this, curSize, binfo};
  }

  /**
   * Get an iterator to the start of the vector.  Concurrency safe.
   * @return An iterator to the start of the vector.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const_iterator cbegin() const {
    return {this, 0, {0, 0, firstBucketLen_}};
  }

  /**
   * Get an iterator to the end of the vector.  Concurrency safe.
   * @return An iterator to the end of the vector.  Note that it is possible for the end to change
   * concurrently with this call.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const_iterator cend() const {
    size_t curSize = size_.load(std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(curSize);
    return {this, curSize, binfo};
  }

  /**
   * Get a starting reverse iterator to the vector.  Concurrency safe.
   * @return A starting reverse iterator to the vector. Note that it is possible for the the rbegin
   * to change concurrently with this call.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  reverse_iterator rbegin() {
    return reverse_iterator(end());
  }

  /**
   * Get an ending reverse iterator to the vector.  Concurrency safe.
   * @return An ending reverse iterator to the vector.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  reverse_iterator rend() {
    return reverse_iterator(begin());
  }

  /**
   * Get a starting reverse iterator to the vector.  Concurrency safe.
   * @return A starting reverse iterator to the vector. Note that it is possible for the the rbegin
   * to change concurrently with this call.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const_reverse_iterator rbegin() const {
    return const_reverse_iterator(cend());
  }

  /**
   * Get an ending reverse iterator to the vector.  Concurrency safe.
   * @return An ending reverse iterator to the vector.
   *
   * @note Iterators and references are stable even if other threads are inserting, but it is
   * still the users responsibility to avoid racing on the element itself.
   **/
  const_reverse_iterator rend() const {
    return const_reverse_iterator(cbegin());
  }

  /**
   * Checks if the vector contains any elements. Concurrency safe.
   * @return true if the vector contains no elements, false otherwise.  Note that an element could
   * be appended concurrently with this call.
   **/
  bool empty() const {
    return size_.load(std::memory_order_relaxed) == 0;
  }

  /**
   * Get the maximum size a vector of this type can theoretically have.  Concurrency safe
   * (constexpr).
   * @return The max size a vector of this type could theoretically have.
   **/
  constexpr size_type max_size() const noexcept {
    return Traits::kMaxVectorSize;
  }

  /**
   * Get the size of the vector.  Concurrency safe.
   * @return The number of elements in the vector.  Note that elements can be appended concurrently
   * with this call.
   **/
  size_type size() const {
    return size_.load(std::memory_order_relaxed);
  }

  /**
   * Get the front element of the vector.  Concurrency safe.
   * @return A reference to the first element in the vector.
   **/
  T& front() {
    return *buffers_[0].load(std::memory_order_relaxed);
  }

  /**
   * Get the front element of the vector.  Concurrency safe.
   * @return A reference to the first element in the vector.
   **/
  const T& front() const {
    return *buffers_[0].load(std::memory_order_relaxed);
  }

  /**
   * Get the last element of the vector.  Concurrency safe.
   * @return A reference to the last element in the vector.  Note that elements could be appended
   * concurrently with this call (in which case it won't be the back anymore).
   **/
  T& back() {
    return *(end() - 1);
  }

  /**
   * Get the last element of the vector.  Concurrency safe.
   * @return A reference to the last element in the vector.  Note that elements could be appended
   * concurrently with this call (in which case it won't be the back anymore).
   **/
  const T& back() const {
    return *(end() - 1);
  }

  /**
   * Swap the contents (and iterators, and element references) of the current vector with oth.
   * @param oth The vector to swap with.
   **/
  void swap(ConcurrentVector& oth) {
    using std::swap;
    swap(firstBucketShift_, oth.firstBucketShift_);
    swap(firstBucketLen_, oth.firstBucketLen_);
    size_t othlen = oth.size_.load(std::memory_order_relaxed);
    oth.size_.store(size_.load(std::memory_order_relaxed), std::memory_order_relaxed);
    size_.store(othlen, std::memory_order_relaxed);

    // okay, this relies on the fact that we're essentially swapping in the move operator.
    buffers_ = std::move(oth.buffers_);
    swapCachedPtrs(oth);
  }

 private:
  DISPENSO_INLINE cv::BucketInfo bucketAndSubIndexForIndex(size_t index) const {
#if defined(_MSC_VER) || defined(__aarch64__) || defined(_M_ARM64)
    // Branching fast path — benefits MSVC and ARM where branch prediction handles
    // sequential access patterns well, avoiding log2/division for the common case.
    if (index < firstBucketLen_) {
      return {0, index, firstBucketLen_};
    }

    size_t l2idx = detail::log2(index);
    size_t bucket = (l2idx + 1) - firstBucketShift_;
    size_t bucketCapacity = size_t{1} << l2idx;
    size_t bucketIndex = index - bucketCapacity;

    return {bucket, bucketIndex, bucketCapacity};
#else
    return bucketAndSubIndex(index);
#endif // _MSC_VER || __aarch64__ || _M_ARM64
  }

  DISPENSO_INLINE cv::BucketInfo bucketAndSubIndex(size_t index) const {
    size_t l2idx = detail::log2(index | 1);
    size_t bucket = (l2idx + 1) - firstBucketShift_;
    size_t bucketCapacity = size_t{1} << l2idx;
    size_t bucketIndex = index - bucketCapacity;

    bucket = index < firstBucketLen_ ? 0 : bucket;
    bucketIndex = index < firstBucketLen_ ? index : bucketIndex;
    bucketCapacity = index < firstBucketLen_ ? firstBucketLen_ : bucketCapacity;

    return {bucket, bucketIndex, bucketCapacity};
  }

  template <typename InIterator>
  void internalInit(InIterator start, InIterator end, iterator it) {
    while (start != end) {
      new (&*it) T(*start);
      ++it;
      ++start;
    }
  }

  void internalFillN(iterator it, size_t len, const T& value) {
    for (; len--; ++it) {
      new (&*it) T(value);
    }
  }

  void internalFillDefaultN(iterator it, size_t len) {
    for (; len--; ++it) {
      new (&*it) T();
    }
  }

  iterator growByUninitialized(size_type delta) {
    auto index = size_.fetch_add(delta, std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(index);
    auto bend = bucketAndSubIndex(index + delta);
    allocateBufferRange(binfo, delta, bend);
    return {this, index, binfo};
  }

  iterator insertPartial(const_iterator pos) {
    auto e = end();
    auto index = size_.fetch_add(1, std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(index);
    allocateBuffer(binfo);
    new (&*e) T();
    return std::move_backward(pos, const_iterator(e), e + 1) - 1;
  }

  iterator insertPartial(const_iterator pos, size_t len) {
    auto e = end();
    auto index = size_.fetch_add(len, std::memory_order_relaxed);
    auto binfo = bucketAndSubIndex(index);
    auto bend = bucketAndSubIndex(index + len);
    allocateBufferRange(binfo, len, bend);
    for (auto it = e + len; it != e;) {
      --it;
      new (&*it) T();
    }
    return std::move_backward(pos, const_iterator(e), e + len) - len;
  }

  alignas(kCacheLineSize) cv::ConVecBuffer<
      T,
      SizeTraits::kDefaultCapacity / 2,
      SizeTraits::kMaxVectorSize,
      Traits::kPreferBuffersInline,
      Traits::kReallocStrategy> buffers_;

  static constexpr size_t kMaxBuffers =
      detail::log2const(SizeTraits::kMaxVectorSize / (SizeTraits::kDefaultCapacity / 2)) + 1;

#if DISPENSO_HAS_CACHED_PTRS
  // Non-atomic cache of buffer pointers for read-hot paths. Buffer pointers are write-once
  // (never change after allocation) and are stored here BEFORE the release store to buffers_[],
  // so any thread that acquires from buffers_[] is guaranteed to see the cached value.
  // All concurrent writes to a given slot store the same value.
  // Disabled on ARM where cache-line invalidation pressure exceeds the read-path benefit.
  alignas(kCacheLineSize) mutable T* cachedPtrs_[kMaxBuffers];
#endif

  size_t firstBucketShift_;
  size_t firstBucketLen_;

  alignas(kCacheLineSize) std::atomic<size_t> size_{0};

  DISPENSO_INLINE T* cachedBuffer(size_t bucket) const {
#if DISPENSO_HAS_CACHED_PTRS
    return cachedPtrs_[bucket];
#else
    // Relaxed is sufficient: callers only access indices that have been fully constructed, which
    // requires an external happens-before with the writing thread. That synchronization
    // transitively carries the buffer pointer visibility (which was release-stored during
    // allocation). This matches the original pre-cache memory ordering and avoids the cost of
    // ldar (load-acquire) on AArch64.
    return buffers_[bucket].load(std::memory_order_relaxed);
#endif
  }

  DISPENSO_INLINE void allocateBuffer(const cv::BucketInfo& binfo) {
#if DISPENSO_HAS_CACHED_PTRS
    buffers_.allocAsNecessary(binfo, cachedPtrs_);
#else
    buffers_.allocAsNecessary(binfo);
#endif
  }

  DISPENSO_INLINE void
  allocateBufferRange(const cv::BucketInfo& binfo, ssize_t rangeLen, const cv::BucketInfo& bend) {
#if DISPENSO_HAS_CACHED_PTRS
    buffers_.allocAsNecessary(binfo, rangeLen, bend, cachedPtrs_);
#else
    buffers_.allocAsNecessary(binfo, rangeLen, bend);
#endif
  }

  DISPENSO_INLINE void initCachedPtrs() {
#if DISPENSO_HAS_CACHED_PTRS
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      cachedPtrs_[i] = nullptr;
    }
#endif
  }

  DISPENSO_INLINE void setCachedPtr(size_t bucket, T* ptr) {
#if DISPENSO_HAS_CACHED_PTRS
    cachedPtrs_[bucket] = ptr;
#else
    (void)bucket;
    (void)ptr;
#endif
  }

  DISPENSO_INLINE void clearCachedPtr(size_t bucket) {
#if DISPENSO_HAS_CACHED_PTRS
    cachedPtrs_[bucket] = nullptr;
#else
    (void)bucket;
#endif
  }

  DISPENSO_INLINE void moveCachedPtrsFrom(ConcurrentVector& other) {
#if DISPENSO_HAS_CACHED_PTRS
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      cachedPtrs_[i] = other.cachedPtrs_[i];
      other.cachedPtrs_[i] = nullptr;
    }
#else
    (void)other;
#endif
  }

  DISPENSO_INLINE void swapCachedPtrs(ConcurrentVector& other) {
#if DISPENSO_HAS_CACHED_PTRS
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      T* cur = cachedPtrs_[i];
      cachedPtrs_[i] = other.cachedPtrs_[i];
      other.cachedPtrs_[i] = cur;
    }
#else
    (void)other;
#endif
  }

  friend class cv::ConVecIterBase<ConcurrentVector<T, Traits>, T>;
  friend class cv::ConcurrentVectorIterator<ConcurrentVector<T, Traits>, T, false>;
  friend class cv::ConcurrentVectorIterator<ConcurrentVector<T, Traits>, T, true>;
};

template <typename T, class Traits1, class Traits2>
inline bool operator==(
    const ConcurrentVector<T, Traits1>& a,
    const ConcurrentVector<T, Traits2>& b) {
  return a.size() == b.size() && std::equal(a.begin(), a.end(), b.begin());
}

template <typename T, class Traits1, class Traits2>
inline bool operator!=(
    const ConcurrentVector<T, Traits1>& a,
    const ConcurrentVector<T, Traits2>& b) {
  return !(a == b);
}

template <typename T, class Traits1, class Traits2>
inline bool operator<(
    const ConcurrentVector<T, Traits1>& a,
    const ConcurrentVector<T, Traits2>& b) {
  return std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end());
}

template <typename T, class Traits1, class Traits2>
inline bool operator>(
    const ConcurrentVector<T, Traits1>& a,
    const ConcurrentVector<T, Traits2>& b) {
  return b < a;
}

template <typename T, class Traits1, class Traits2>
inline bool operator<=(
    const ConcurrentVector<T, Traits1>& a,
    const ConcurrentVector<T, Traits2>& b) {
  return !(b < a);
}

template <typename T, class Traits1, class Traits2>
inline bool operator>=(
    const ConcurrentVector<T, Traits1>& a,
    const ConcurrentVector<T, Traits2>& b) {
  return !(a < b);
}

template <typename T, class Traits>
inline void swap(ConcurrentVector<T, Traits>& a, ConcurrentVector<T, Traits>& b) {
  a.swap(b);
}

// Textual inclusion.  Includes undocumented implementation details, e.g. iterators.
#include <dispenso/detail/concurrent_vector_impl2.h>

} // namespace dispenso


================================================
FILE: dispenso/detail/can_invoke.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <stdint.h>

#include <type_traits>
#include <utility>

namespace dispenso {
namespace detail {

template <class>
using void_t = void;
template <class Sig, class = void>
struct CanInvoke : std::false_type {};
template <class F, class... Args>
struct CanInvoke<F(Args...), void_t<decltype(std::declval<F>()(std::declval<Args>()...))>>
    : std::true_type {};

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/completion_event_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <algorithm>
#include <atomic>
#include <chrono>

// Implementation notes:
// 1. You should know what you're doing if you are directly using CompletionEventImpl.  Most people
//    should be using CompletionEvent, which is designed for safe use by folks without strong
//    knowledge of atomics, etc...
// 2. intrusiveStatus may be used to gain access to the internal atomic status
// 3. intrusiveStatus may set the status to anything... *but* once wait functions have been called,
//    if intrusiveStatus sets the status to completedStatus, it should not change the status again.
//    After notify, the status should not be modified.
// 4. It is not an error to set the status to completedStatus, and then call notify with
//    completedStatus, but it is very likely to be unnecessary.  Usually, while there may be threads
//    racing to get some statuses, and they should use compare_exchange functions to resolve those,
//    setting completed status should not typically be racy.

#include "notifier_common.h"

namespace dispenso {
namespace detail {

#define DISPENSO_COMPLETION_SUPPORTS_TRY_NOTIFY

#if defined(__linux__)

class CompletionEventImpl {
 public:
  CompletionEventImpl(int initStatus) : ftx_(initStatus) {}

  void notify(int completedStatus) {
    status_.store(completedStatus, std::memory_order_release);
    futex(&ftx_, FUTEX_WAKE_PRIVATE, std::numeric_limits<int>::max(), nullptr, nullptr, 0);
  }

  void tryNotify() {
    futex(&ftx_, FUTEX_WAKE_PRIVATE, std::numeric_limits<int>::max(), nullptr, nullptr, 0);
  }

  void wait(int completedStatus) const {
    int current;
    while ((current = status_.load(std::memory_order_acquire)) != completedStatus) {
      futex(&ftx_, FUTEX_WAIT_PRIVATE, current, nullptr, nullptr, 0);
    }
  }

  bool waitFor(int completedStatus, const std::chrono::duration<double>& relTime) const {
    if (status_.load(std::memory_order_acquire) == completedStatus) {
      return true;
    }

    double relSeconds = relTime.count();
    if (relSeconds <= 0.0) {
      return false;
    }

    struct timespec ts;
    ts.tv_sec = static_cast<time_t>(relSeconds);
    relSeconds -= static_cast<double>(ts.tv_sec);
    ts.tv_nsec = static_cast<long>(1e9 * relSeconds);

    // TODO: determine if we should worry about reducing timeout time subsequent times through the
    // loop in the case of spurious wake.
    int current;
    while ((current = status_.load(std::memory_order_acquire)) != completedStatus) {
      if (futex(&ftx_, FUTEX_WAIT_PRIVATE, current, &ts, nullptr, 0) && errno == ETIMEDOUT) {
        // Intentionally not re-checking status: returning false on timeout is consistent with
        // std::condition_variable::wait_for semantics. The benign race where notify arrives
        // concurrently with timeout is handled by the caller retrying if needed.
        return false;
      }
    }
    return true;
  }

  template <class Clock, class Duration>
  bool waitUntil(int completedStatus, const std::chrono::time_point<Clock, Duration>& absTime)
      const {
    if (status_.load(std::memory_order_acquire) == completedStatus) {
      return true;
    }

    return waitFor(completedStatus, absTime - Clock::now());
  }

  std::atomic<int>& intrusiveStatus() {
    return status_;
  }

  const std::atomic<int>& intrusiveStatus() const {
    return status_;
  }

 private:
  union {
    mutable int ftx_;
    std::atomic<int> status_;
  };
};

#elif defined(__MACH__) && defined(DISPENSO_HAS_OS_SYNC)

// Uses a 64-bit combined {status, waiterCount} to eliminate the Dekker race between notify() and
// wait(). notify() atomically sets status while observing waiterCount via CAS; wait() atomically
// increments waiterCount while observing status via CAS. The 8-byte futex compare detects changes
// to either field. intrusiveStatus() returns a reference to the status portion (lower 32 bits on
// little-endian) for direct CAS/fetch_sub by FutureImplBase and Latch.
class CompletionEventImpl {
 public:
  CompletionEventImpl(int initStatus)
      : combined_(static_cast<uint64_t>(static_cast<uint32_t>(initStatus))) {}

  void notify(int completedStatus) {
    uint64_t old = combined_.load(std::memory_order_relaxed);
    uint64_t newVal;
    do {
      newVal = (old & kWaiterMask) | static_cast<uint32_t>(completedStatus);
    } while (!combined_.compare_exchange_weak(
        old, newVal, std::memory_order_release, std::memory_order_relaxed));
    if (old & kWaiterMask) {
      mac_futex_wake_all(&combined_, sizeof(uint64_t));
    }
  }

  void tryNotify() {
    if (combined_.load(std::memory_order_acquire) & kWaiterMask) {
      mac_futex_wake_all(&combined_, sizeof(uint64_t));
    }
  }

  void wait(int completedStatus) const {
    uint64_t val = combined_.load(std::memory_order_acquire);
    while (static_cast<int>(val) != completedStatus) {
      uint64_t newVal = val + kOneWaiter;
      if (!combined_.compare_exchange_weak(
              val, newVal, std::memory_order_acq_rel, std::memory_order_acquire)) {
        continue;
      }
      mac_futex_wait(&combined_, newVal, sizeof(uint64_t));
      combined_.fetch_sub(kOneWaiter, std::memory_order_acq_rel);
      val = combined_.load(std::memory_order_acquire);
    }
  }

  bool waitFor(int completedStatus, const std::chrono::duration<double>& relTime) const {
    if (static_cast<int>(combined_.load(std::memory_order_acquire)) == completedStatus) {
      return true;
    }

    double relSeconds = relTime.count();
    if (relSeconds <= 0.0) {
      return false;
    }

    uint64_t relTimeUs = static_cast<uint64_t>(relSeconds * 1e6);

    // TODO: determine if we should worry about reducing timeout time subsequent times through the
    // loop in the case of spurious wake.
    uint64_t val = combined_.load(std::memory_order_acquire);
    while (static_cast<int>(val) != completedStatus) {
      uint64_t newVal = val + kOneWaiter;
      if (!combined_.compare_exchange_weak(
              val, newVal, std::memory_order_acq_rel, std::memory_order_acquire)) {
        continue;
      }
      int ret = mac_futex_wait_for(&combined_, newVal, sizeof(uint64_t), relTimeUs);
      combined_.fetch_sub(kOneWaiter, std::memory_order_acq_rel);
      if (ret && errno == ETIMEDOUT) {
        // Intentionally not re-checking status: returning false on timeout is consistent with
        // std::condition_variable::wait_for semantics. The benign race where notify arrives
        // concurrently with timeout is handled by the caller retrying if needed.
        return false;
      }
      val = combined_.load(std::memory_order_acquire);
    }
    return true;
  }

  template <class Clock, class Duration>
  bool waitUntil(int completedStatus, const std::chrono::time_point<Clock, Duration>& absTime)
      const {
    if (static_cast<int>(combined_.load(std::memory_order_acquire)) == completedStatus) {
      return true;
    }

    return waitFor(completedStatus, absTime - Clock::now());
  }

  std::atomic<int>& intrusiveStatus() {
    return parts_[0];
  }

  const std::atomic<int>& intrusiveStatus() const {
    return parts_[0];
  }

 private:
  static constexpr uint64_t kOneWaiter = 1ULL << 32;
  static constexpr uint64_t kWaiterMask = ~0xFFFFFFFFULL;

  union {
    mutable std::atomic<uint64_t> combined_;
    mutable std::atomic<int> parts_[2]; // [0] = status (little-endian)
  };
};

#elif defined(_WIN32)

class CompletionEventImpl {
 public:
  CompletionEventImpl(int initStatus) : status_(initStatus) {}

  void notify(int completedStatus) {
    status_.store(completedStatus, std::memory_order_release);
    WakeByAddressAll(&status_);
  }

  void tryNotify() {
    WakeByAddressAll(&status_);
  }

  void wait(int completedStatus) const {
    int current;
    while ((current = status_.load(std::memory_order_acquire)) != completedStatus) {
      WaitOnAddress(&status_, &current, sizeof(int), kInfiniteWin);
    }
  }

  bool waitFor(int completedStatus, const std::chrono::duration<double>& relTime) const {
    if (status_.load(std::memory_order_acquire) == completedStatus) {
      return true;
    }

    double relSeconds = relTime.count();
    if (relSeconds <= 0.0) {
      return false;
    }

    int msWait = std::max(1, static_cast<int>(relSeconds * 1000.0));

    // TODO: determine if we should worry about reducing timeout time subsequent times through the
    // loop in the case of spurious wake.
    int current;
    while ((current = status_.load(std::memory_order_acquire)) != completedStatus) {
      if (!WaitOnAddress(&status_, &current, sizeof(int), msWait) &&
          GetLastError() == kErrorTimeoutWin) {
        // Intentionally not re-checking status: returning false on timeout is consistent with
        // std::condition_variable::wait_for semantics. The benign race where notify arrives
        // concurrently with timeout is handled by the caller retrying if needed.
        return false;
      }
    }
    return true;
  }

  template <class Clock, class Duration>
  bool waitUntil(int completedStatus, const std::chrono::time_point<Clock, Duration>& absTime)
      const {
    if (status_.load(std::memory_order_acquire) == completedStatus) {
      return true;
    }

    return waitFor(completedStatus, absTime - Clock::now());
  }

  std::atomic<int>& intrusiveStatus() {
    return status_;
  }

  const std::atomic<int>& intrusiveStatus() const {
    return status_;
  }

 private:
  mutable std::atomic<int> status_;
};

#else

#undef DISPENSO_COMPLETION_SUPPORTS_TRY_NOTIFY

// Fallback C++11 implementation.
class CompletionEventImpl {
 public:
  CompletionEventImpl(int initStatus) : status_(initStatus) {}

  void notify(int completedStatus) {
    // See https://en.cppreference.com/w/cpp/thread/condition_variable
    // "Even if the shared variable is atomic, it must be modified under the mutex in order to
    // correctly publish the modification to the waiting thread."
    {
      std::lock_guard<std::mutex> lk(mtx_);
      status_.store(completedStatus, std::memory_order_release);
    }
    cv_.notify_all();
  }

  void wait(int completedStatus) const {
    if (status_.load(std::memory_order_acquire) != completedStatus) {
      std::unique_lock<std::mutex> lk(mtx_);
      cv_.wait(lk, [this, completedStatus]() {
        return status_.load(std::memory_order_relaxed) == completedStatus;
      });
    }
  }

  template <class Rep, class Period>
  bool waitFor(int completedStatus, const std::chrono::duration<Rep, Period>& relTime) const {
    if (status_.load(std::memory_order_acquire) == completedStatus) {
      return true;
    }
    std::unique_lock<std::mutex> lk(mtx_);
    return cv_.wait_for(lk, relTime, [this, completedStatus] {
      return status_.load(std::memory_order_relaxed) == completedStatus;
    });
  }

  template <class Clock, class Duration>
  bool waitUntil(int completedStatus, const std::chrono::time_point<Clock, Duration>& absTime)
      const {
    if (status_.load(std::memory_order_acquire) == completedStatus) {
      return true;
    }
    std::unique_lock<std::mutex> lk(mtx_);
    return cv_.wait_until(lk, absTime, [this, completedStatus] {
      return status_.load(std::memory_order_relaxed) == completedStatus;
    });
  }

  std::atomic<int>& intrusiveStatus() {
    return status_;
  }

  const std::atomic<int>& intrusiveStatus() const {
    return status_;
  }

 private:
  mutable std::mutex mtx_;
  mutable std::condition_variable cv_;
  std::atomic<int> status_;
};
#endif // platform

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/concurrent_vector_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// This file intended for textual inclusion into concurrent_vector.h only

namespace cv {

struct BucketInfo {
  size_t bucket;
  size_t bucketIndex;
  size_t bucketCapacity;
};

template <typename VecT, typename T>
class ConVecIterBase {
 public:
  using iterator_category = std::random_access_iterator_tag;
  using difference_type = ssize_t;
  using value_type = T;

  ConVecIterBase() = default;
  ConVecIterBase(const VecT* vec, BucketInfo info);

 protected:
  ConVecIterBase(uintptr_t vb, T* bucketStart, T* bucketPtr, T* bucketEnd)
      : vb_(vb), bucketStart_(bucketStart), bucketPtr_(bucketPtr), bucketEnd_(bucketEnd) {}

  struct VecAndBucket {
    const VecT* vec;
    const size_t bucket;
  };
  VecAndBucket getVecAndBucket() const {
    static_assert(
        alignof(VecT) >= 64,
        "This code relies on the ConcurrentVector pointer to be 64-byte aligned");
    if (sizeof(uintptr_t) == 8) {
      return {reinterpret_cast<const VecT*>(vb_ & 0xffffffffffffffc0UL), vb_ & 0x3f};
    } else {
      return {reinterpret_cast<const VecT*>(vb_ & 0xffffffc0), vb_ & 0x3f};
    }
  }

  // Note: Could have a size-optimized iterator via traits.  It is easy to reduce this to 24 bytes
  // on 64-bit platform, but this will result in a large enough performance hit that storing an
  // index is preferable (see CompactCVecIterator).  16 bytes on 32-bit platform.
  alignas(4 * sizeof(uintptr_t)) uintptr_t vb_;
  T* bucketStart_;
  T* bucketPtr_;
  T* bucketEnd_;

 private:
  friend bool operator!=(const ConVecIterBase& a, const ConVecIterBase& b) {
    return a.bucketPtr_ != b.bucketPtr_;
  }
  friend bool operator==(const ConVecIterBase& a, const ConVecIterBase& b) {
    return a.bucketPtr_ == b.bucketPtr_;
  }
  friend bool operator<(const ConVecIterBase& a, const ConVecIterBase& b) {
    return a.vb_ < b.vb_ || (a.vb_ == b.vb_ && a.bucketPtr_ < b.bucketPtr_);
  }
  friend bool operator<=(const ConVecIterBase& a, const ConVecIterBase& b) {
    return a.vb_ < b.vb_ || (a.vb_ == b.vb_ && a.bucketPtr_ <= b.bucketPtr_);
  }
  friend bool operator>(const ConVecIterBase& a, const ConVecIterBase& b) {
    return a.vb_ > b.vb_ || (a.vb_ == b.vb_ && a.bucketPtr_ > b.bucketPtr_);
  }
  friend bool operator>=(const ConVecIterBase& a, const ConVecIterBase& b) {
    return a.vb_ > b.vb_ || (a.vb_ == b.vb_ && a.bucketPtr_ >= b.bucketPtr_);
  }

  friend difference_type operator-(const ConVecIterBase& a, const ConVecIterBase& b) {
    size_t abucket = a.vb_ & 0x3f;
    size_t bbucket = b.vb_ & 0x3f;

    if (abucket == bbucket) {
      return a.bucketPtr_ - b.bucketPtr_;
    }

    ssize_t aLeft = a.bucketPtr_ - a.bucketStart_;
    ssize_t bLeft = b.bucketPtr_ - b.bucketStart_;

    aLeft += (bool)abucket * (a.bucketEnd_ - a.bucketStart_);
    bLeft += (bool)bbucket * (b.bucketEnd_ - b.bucketStart_);

    return aLeft - bLeft;
  }
};

template <typename VecT, typename T, bool kIsConst>
class ConcurrentVectorIterator : public ConVecIterBase<VecT, T> {
 public:
  using iterator_category = typename ConVecIterBase<VecT, T>::iterator_category;
  using difference_type = typename ConVecIterBase<VecT, T>::difference_type;
  using value_type = typename ConVecIterBase<VecT, T>::value_type;
  using pointer = std::conditional_t<kIsConst, std::add_const_t<T*>, T*>;
  using reference = std::conditional_t<kIsConst, std::add_const_t<T&>, T&>;

  ConcurrentVectorIterator() = default;

  ConcurrentVectorIterator(const VecT* vec, BucketInfo info) : ConVecIterBase<VecT, T>(vec, info) {}
  ConcurrentVectorIterator(const VecT* vec, size_t /*index*/, BucketInfo info)
      : ConcurrentVectorIterator(vec, info) {}

  template <bool kWasConst, typename = std::enable_if_t<kIsConst && !kWasConst>>
  ConcurrentVectorIterator(const ConcurrentVectorIterator<VecT, T, kWasConst>& other)
      : ConVecIterBase<VecT, T>(other) {}

  reference operator*() const;
  pointer operator->() const;

  reference operator[](difference_type n) const;

  ConcurrentVectorIterator& operator++();

  ConcurrentVectorIterator operator++(int) {
    auto result = *this;
    operator++();
    return result;
  }

  ConcurrentVectorIterator& operator--();

  ConcurrentVectorIterator operator--(int) {
    auto result = *this;
    operator--();
    return result;
  }

  ConcurrentVectorIterator& operator+=(difference_type n);
  ConcurrentVectorIterator& operator-=(difference_type n) {
    return operator+=(-n);
  }

  ConcurrentVectorIterator operator+(difference_type n) const;
  ConcurrentVectorIterator operator-(difference_type n) const {
    return operator+(-n);
  }

 private:
  ConcurrentVectorIterator(uintptr_t vb, T* bucketStart, T* bucketPtr, T* bucketEnd)
      : ConVecIterBase<VecT, T>(vb, bucketStart, bucketPtr, bucketEnd) {}

  using ConVecIterBase<VecT, T>::getVecAndBucket;

  using ConVecIterBase<VecT, T>::vb_;
  using ConVecIterBase<VecT, T>::bucketStart_;
  using ConVecIterBase<VecT, T>::bucketPtr_;
  using ConVecIterBase<VecT, T>::bucketEnd_;
};

template <typename VecT, typename T>
class CompactCVecIterBase {
 public:
  using iterator_category = std::random_access_iterator_tag;
  using difference_type = ssize_t;
  using value_type = T;

  CompactCVecIterBase() = default;

  CompactCVecIterBase(const VecT* vec, size_t index) : vec_(vec), index_(index) {}

 protected:
  // 16 bytes on 64-bit platform, 8 bytes on 32-bit platform.
  const VecT* vec_;
  size_t index_;

 private:
  friend bool operator!=(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ != b.index_;
  }
  friend bool operator==(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ == b.index_;
  }
  friend bool operator<(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ < b.index_;
  }
  friend bool operator<=(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ <= b.index_;
  }
  friend bool operator>(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ > b.index_;
  }
  friend bool operator>=(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ >= b.index_;
  }

  friend difference_type operator-(const CompactCVecIterBase& a, const CompactCVecIterBase& b) {
    return a.index_ - b.index_;
  }
};

template <typename VecT, typename T, bool kIsConst>
class CompactCVecIterator : public CompactCVecIterBase<VecT, T> {
 public:
  using difference_type = ssize_t;
  using value_type = T;
  using pointer = std::conditional_t<kIsConst, std::add_const_t<T*>, T*>;
  using reference = std::conditional_t<kIsConst, std::add_const_t<T&>, T&>;
  using iterator_category = std::random_access_iterator_tag;

  CompactCVecIterator() = default;

  CompactCVecIterator(const VecT* vec, size_t index, BucketInfo)
      : CompactCVecIterator(vec, index) {}
  CompactCVecIterator(const VecT* vec, size_t index) : CompactCVecIterBase<VecT, T>(vec, index) {}
  template <bool kWasConst, typename = std::enable_if_t<kIsConst && !kWasConst>>
  CompactCVecIterator(const CompactCVecIterator<VecT, T, kWasConst>& other)
      : CompactCVecIterBase<VecT, T>(other) {}
  reference operator*() const;
  pointer operator->() const;

  reference operator[](difference_type n) const;

  CompactCVecIterator& operator++() {
    ++index_;
    return *this;
  }

  CompactCVecIterator operator++(int) {
    auto result = *this;
    operator++();
    return result;
  }

  CompactCVecIterator& operator--() {
    --index_;
    return *this;
  }

  CompactCVecIterator operator--(int) {
    auto result = *this;
    operator--();
    return result;
  }

  CompactCVecIterator& operator+=(difference_type n) {
    index_ += n;
    return *this;
  }
  CompactCVecIterator& operator-=(difference_type n) {
    return operator+=(-n);
  }

  CompactCVecIterator operator+(difference_type n) const {
    return {vec_, index_ + n};
  }
  CompactCVecIterator operator-(difference_type n) const {
    return operator+(-n);
  }

 private:
  using CompactCVecIterBase<VecT, T>::vec_;
  using CompactCVecIterBase<VecT, T>::index_;
};

template <typename T>
inline T* alloc(size_t elts) {
  return reinterpret_cast<T*>(detail::alignedMalloc(elts * sizeof(T), alignof(T)));
}

template <typename T>
inline void dealloc(T* p) {
  detail::alignedFree(p);
}

template <typename T, size_t kMinBufferSize, size_t kMaxVectorSize, bool kMakeInline>
class ConVecBufferBase {};

template <typename T, size_t kMinBufferSize, size_t kMaxVectorSize>
class ConVecBufferBase<T, kMinBufferSize, kMaxVectorSize, false> {
 public:
  static constexpr size_t kMaxBuffers = detail::log2const(kMaxVectorSize / kMinBufferSize) + 1;

  ConVecBufferBase() : buffers_(alloc<detail::AlignedAtomic<T>>(kMaxBuffers)) {
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      buffers_[i].store(nullptr, std::memory_order_relaxed);
    }
  }

  ~ConVecBufferBase() {
    dealloc<detail::AlignedAtomic<T>>(buffers_);
  }

  ConVecBufferBase(ConVecBufferBase&& other)
      : buffers_(std::exchange(other.buffers_, alloc<detail::AlignedAtomic<T>>(kMaxBuffers))) {
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      other.buffers_[i].store(nullptr, std::memory_order_relaxed);
    }
  }

  ConVecBufferBase& operator=(ConVecBufferBase&& other) {
    using std::swap;
    swap(other.buffers_, buffers_);
    return *this;
  }

 protected:
  detail::AlignedAtomic<T>* buffers_;
};

template <typename T, size_t kMinBufferSize, size_t kMaxVectorSize>
class ConVecBufferBase<T, kMinBufferSize, kMaxVectorSize, true> {
 public:
  static constexpr size_t kMaxBuffers = detail::log2const(kMaxVectorSize / kMinBufferSize) + 1;

  ConVecBufferBase() {
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      buffers_[i].store(nullptr, std::memory_order_relaxed);
    }
  }
  ConVecBufferBase(ConVecBufferBase&& other) {
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      buffers_[i].store(
          other.buffers_[i].load(std::memory_order_relaxed), std::memory_order_relaxed);
      other.buffers_[i].store(nullptr, std::memory_order_relaxed);
    }
  }

  ConVecBufferBase& operator=(ConVecBufferBase&& other) {
    for (size_t i = 0; i < kMaxBuffers; ++i) {
      T* cur = buffers_[i].load(std::memory_order_relaxed);
      T* oth = other.buffers_[i].load(std::memory_order_relaxed);

      buffers_[i].store(oth, std::memory_order_relaxed);
      other.buffers_[i].store(cur, std::memory_order_relaxed);
    }
    return *this;
  }

 protected:
  detail::AlignedAtomic<T> buffers_[kMaxBuffers];
};

template <
    typename T,
    size_t kMinBufferSize,
    size_t kMaxVectorSize,
    bool kMakeInline,
    ConcurrentVectorReallocStrategy kStrategy>
class ConVecBuffer : public ConVecBufferBase<T, kMinBufferSize, kMaxVectorSize, kMakeInline> {
 public:
  ConVecBuffer() {
    std::memset(shouldDealloc_, 0, BaseType::kMaxBuffers * sizeof(bool));
  }

  ConVecBuffer(ConVecBuffer&& other) : BaseType(std::move(other)) {
    std::memcpy(shouldDealloc_, other.shouldDealloc_, BaseType::kMaxBuffers * sizeof(bool));
    std::memset(other.shouldDealloc_, 0, BaseType::kMaxBuffers * sizeof(bool));
  }

  ConVecBuffer& operator=(ConVecBuffer&& other) {
    if (&other != this) {
      BaseType::operator=(std::move(other));
      std::swap_ranges(
          other.shouldDealloc_, other.shouldDealloc_ + BaseType::kMaxBuffers, shouldDealloc_);
    }
    return *this;
  }

  detail::AlignedAtomic<T>& operator[](size_t bucket) {
    return this->buffers_[bucket];
  }

  const detail::AlignedAtomic<T>& operator[](size_t bucket) const {
    return this->buffers_[bucket];
  }

  void allocAsNecessary(const BucketInfo& binfo) {
    allocAsNecessaryImpl(binfo, [](size_t, T*) {});
  }

  void allocAsNecessary(const BucketInfo& binfo, T** cachedPtrs) {
    allocAsNecessaryImpl(binfo, [cachedPtrs](size_t b, T* p) { cachedPtrs[b] = p; });
  }

  void allocAsNecessary(const BucketInfo& binfo, ssize_t rangeLen, const BucketInfo& bend) {
    allocAsNecessaryImpl(binfo, rangeLen, bend, [](size_t, T*) {});
  }

  void allocAsNecessary(
      const BucketInfo& binfo,
      ssize_t rangeLen,
      const BucketInfo& bend,
      T** cachedPtrs) {
    allocAsNecessaryImpl(
        binfo, rangeLen, bend, [cachedPtrs](size_t b, T* p) { cachedPtrs[b] = p; });
  }

  bool shouldDealloc(size_t bucket) const {
    return shouldDealloc_[bucket];
  }

 private:
  using BaseType = ConVecBufferBase<T, kMinBufferSize, kMaxVectorSize, kMakeInline>;

  DISPENSO_INLINE static size_t allocCheckIndex(size_t bucketCapacity) {
    return (kStrategy == ConcurrentVectorReallocStrategy::kFullBufferAhead)
        ? 0
        : (kStrategy == ConcurrentVectorReallocStrategy::kHalfBufferAhead ? bucketCapacity / 2
                                                                          : bucketCapacity - 1);
  }

  template <typename CacheUpdate>
  void allocAsNecessaryImpl(const BucketInfo& binfo, CacheUpdate&& cacheUpdate) {
    if (DISPENSO_EXPECT(binfo.bucketIndex == allocCheckIndex(binfo.bucketCapacity), 0)) {
      if (!this->buffers_[binfo.bucket + 1].load(std::memory_order_acquire)) {
        T* newBuf = cv::alloc<T>(binfo.bucketCapacity << 1);
        cacheUpdate(binfo.bucket + 1, newBuf);
        this->buffers_[binfo.bucket + 1].store(newBuf, std::memory_order_release);
        shouldDealloc_[binfo.bucket + 1] = true;
      }
    }
    while (DISPENSO_EXPECT(!this->buffers_[binfo.bucket].load(std::memory_order_acquire), 0)) {
    }
  }

  template <typename CacheUpdate>
  DISPENSO_INLINE bool tryAssignBuffer(
      size_t bucket,
      T*& allocBufs,
      size_t cap,
      bool firstAccounted,
      CacheUpdate&& cacheUpdate) {
    if (!this->buffers_[bucket].load(std::memory_order_acquire)) {
      cacheUpdate(bucket, allocBufs);
      this->buffers_[bucket].store(allocBufs, std::memory_order_release);
      allocBufs += cap;
      shouldDealloc_[bucket] = !firstAccounted;
      return true;
    }
    return false;
  }

  template <typename CacheUpdate>
  void allocAsNecessaryImpl(
      const BucketInfo& binfo,
      ssize_t rangeLen,
      const BucketInfo& bend,
      CacheUpdate&& cacheUpdate) {
    const size_t indexToCheck = allocCheckIndex(binfo.bucketCapacity);

    bool allocCurrentBucket =
        binfo.bucketIndex <= indexToCheck && binfo.bucketIndex + rangeLen > indexToCheck;
    if (DISPENSO_EXPECT(allocCurrentBucket || binfo.bucket < bend.bucket, 0)) {
      size_t sizeToAlloc = 0;

      size_t cap = binfo.bucketCapacity << ((bool)binfo.bucket + !allocCurrentBucket);
      size_t bucket = binfo.bucket + 1 + !allocCurrentBucket;
      for (; bucket <= bend.bucket; ++bucket, cap <<= 1) {
        if (!this->buffers_[bucket].load(std::memory_order_acquire)) {
          sizeToAlloc += cap;
        }
      }

      assert(bucket == bend.bucket + 1);
      assert((bucket == 1 && cap == bend.bucketCapacity) || cap == bend.bucketCapacity * 2);

      const size_t endToCheck = allocCheckIndex(bend.bucketCapacity);

      if (DISPENSO_EXPECT(bend.bucketIndex > endToCheck, 0)) {
        if (!this->buffers_[bucket].load(std::memory_order_acquire)) {
          sizeToAlloc += cap;
        }
      }

      T* allocBufs = nullptr;
      if (sizeToAlloc) {
        allocBufs = cv::alloc<T>(sizeToAlloc);
      }
      bool firstAccounted = false;

      cap = binfo.bucketCapacity << ((bool)binfo.bucket + !allocCurrentBucket);
      bucket = binfo.bucket + 1 + !allocCurrentBucket;
      for (; bucket <= bend.bucket; ++bucket, cap <<= 1) {
        firstAccounted |= tryAssignBuffer(bucket, allocBufs, cap, firstAccounted, cacheUpdate);
      }

      assert(bucket == bend.bucket + 1);
      assert((bucket == 1 && cap == bend.bucketCapacity) || cap == bend.bucketCapacity * 2);

      if (DISPENSO_EXPECT(bend.bucketIndex > endToCheck, 0)) {
        tryAssignBuffer(bucket, allocBufs, cap, firstAccounted, cacheUpdate);
      }
    }
    for (size_t bucket = binfo.bucket; bucket <= bend.bucket; ++bucket) {
      while (DISPENSO_EXPECT(!this->buffers_[bucket].load(std::memory_order_acquire), 0)) {
#if defined(DISPENSO_HAS_TSAN)
        std::this_thread::sleep_for(std::chrono::microseconds(1));
#endif // DISPENSO_HAS_TSAN
      }
    }
  }

  bool shouldDealloc_[BaseType::kMaxBuffers];
};

} // namespace cv


================================================
FILE: dispenso/detail/concurrent_vector_impl2.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// This file intended for textual inclusion into concurrent_vector.h only

namespace cv {

template <typename VecT, typename T>
DISPENSO_INLINE ConVecIterBase<VecT, T>::ConVecIterBase(const VecT* vec, cv::BucketInfo info)
    : vb_(reinterpret_cast<uintptr_t>(vec) | info.bucket),
      bucketStart_(vec->buffers_[info.bucket].load(std::memory_order_relaxed)),
      bucketPtr_(bucketStart_ + info.bucketIndex),
      bucketEnd_(bucketStart_ + info.bucketCapacity) {}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>&
ConcurrentVectorIterator<VecT, T, kIsConst>::operator++() {
  ++bucketPtr_;
  if (bucketPtr_ == bucketEnd_) {
    auto len = bucketEnd_ - bucketStart_;
    ++vb_;
    auto vb = getVecAndBucket();
    len <<= int{vb.bucket > 1};
    bucketPtr_ = bucketStart_ = vb.vec->cachedBuffer(vb.bucket);
    bucketEnd_ = bucketPtr_ + len;
  }
  return *this;
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>&
ConcurrentVectorIterator<VecT, T, kIsConst>::operator--() {
  --bucketPtr_;
  if (bucketPtr_ < bucketStart_) {
    auto vb = getVecAndBucket();
    if (vb.bucket) {
      auto len = bucketEnd_ - bucketStart_;
      --vb_;
      len >>= int{vb.bucket > 1};
      bucketStart_ = vb.vec->cachedBuffer(vb.bucket - 1);
      bucketPtr_ = bucketStart_ + len;
      bucketEnd_ = bucketPtr_;
      --bucketPtr_;
    }
  }
  return *this;
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE typename ConcurrentVectorIterator<VecT, T, kIsConst>::reference
ConcurrentVectorIterator<VecT, T, kIsConst>::operator*() const {
  return *bucketPtr_;
}
template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE typename ConcurrentVectorIterator<VecT, T, kIsConst>::pointer
ConcurrentVectorIterator<VecT, T, kIsConst>::operator->() const {
  return &operator*();
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE typename ConcurrentVectorIterator<VecT, T, kIsConst>::reference
ConcurrentVectorIterator<VecT, T, kIsConst>::operator[](difference_type n) const {
  T* nPtr = bucketPtr_ + n;
  if (nPtr >= bucketStart_ && nPtr < bucketEnd_) {
    return *nPtr;
  }

  auto vb = getVecAndBucket();

  // Reconstruct index
  ssize_t oldIndex = bucketPtr_ - bucketStart_;
  oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_);
  auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n);
  return *(vb.vec->cachedBuffer(binfo.bucket) + binfo.bucketIndex);
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>&
ConcurrentVectorIterator<VecT, T, kIsConst>::operator+=(difference_type n) {
  T* nPtr = bucketPtr_ + n;
  if (nPtr >= bucketStart_ && nPtr < bucketEnd_) {
    bucketPtr_ = nPtr;
    return *this;
  }

  auto vb = getVecAndBucket();

  // Reconstruct index
  ssize_t oldIndex = bucketPtr_ - bucketStart_;
  oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_);
  auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n);
  bucketStart_ = vb.vec->cachedBuffer(binfo.bucket);
  bucketEnd_ = bucketStart_ + binfo.bucketCapacity;
  bucketPtr_ = bucketStart_ + binfo.bucketIndex;
  vb_ = reinterpret_cast<uintptr_t>(vb.vec) | binfo.bucket;
  return *this;
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>
ConcurrentVectorIterator<VecT, T, kIsConst>::operator+(difference_type n) const {
  T* nPtr = bucketPtr_ + n;
  if (nPtr >= bucketStart_ && nPtr < bucketEnd_) {
    return {vb_, bucketStart_, nPtr, bucketEnd_};
  }

  auto vb = getVecAndBucket();
  // Reconstruct index
  ssize_t oldIndex = bucketPtr_ - bucketStart_;
  oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_);
  auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n);
  return {vb.vec, binfo};
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE typename CompactCVecIterator<VecT, T, kIsConst>::reference
CompactCVecIterator<VecT, T, kIsConst>::operator*() const {
  return const_cast<VecT&>(*vec_)[index_];
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE typename CompactCVecIterator<VecT, T, kIsConst>::pointer
CompactCVecIterator<VecT, T, kIsConst>::operator->() const {
  return &operator*();
}

template <typename VecT, typename T, bool kIsConst>
DISPENSO_INLINE typename CompactCVecIterator<VecT, T, kIsConst>::reference
CompactCVecIterator<VecT, T, kIsConst>::operator[](ssize_t n) const {
  return const_cast<VecT&>(*vec_)[index_ + n];
}
} // namespace cv


================================================
FILE: dispenso/detail/epoch_waiter.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <algorithm>
#include <atomic>
#include <chrono>

#include "notifier_common.h"

namespace dispenso {
namespace detail {

#if defined(__linux__)

class EpochWaiter {
 public:
  EpochWaiter() : ftx_(0) {}

  void bumpAndWake() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    futex(&ftx_, FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
  }

  void bumpAndWakeAll() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    futex(&ftx_, FUTEX_WAKE_PRIVATE, std::numeric_limits<int>::max(), nullptr, nullptr, 0);
  }

  void bumpAndWakeN(int n, int totalWaiters) {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    (void)totalWaiters;
    futex(&ftx_, FUTEX_WAKE_PRIVATE, n, nullptr, nullptr, 0);
  }

  uint32_t wait(uint32_t expectedEpoch) const {
    uint32_t current;
    // allow spurious wakeups
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      futex(&ftx_, FUTEX_WAIT_PRIVATE, expectedEpoch, nullptr, nullptr, 0);
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t current() const {
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t waitFor(uint32_t expectedEpoch, uint64_t relTimeUs) const {
    uint32_t current;
    if ((current = epoch_.load(std::memory_order_acquire)) != expectedEpoch) {
      return current;
    }

    struct timespec ts;
    ts.tv_sec = static_cast<decltype(ts.tv_sec)>(relTimeUs / 1000000);
    ts.tv_nsec = static_cast<decltype(ts.tv_nsec)>((relTimeUs - (ts.tv_sec * 1000000)) * 1000);

    // allow spurious wakeups
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      futex(&ftx_, FUTEX_WAIT_PRIVATE, current, &ts, nullptr, 0);
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

 private:
  union {
    mutable int ftx_;
    std::atomic<uint32_t> epoch_;
  };
};

#elif defined(__MACH__) && defined(DISPENSO_HAS_MAC_FUTEX)

class EpochWaiter {
 public:
  EpochWaiter() : epoch_(0) {}

  void bumpAndWake() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    mac_futex_wake_one(&epoch_, sizeof(uint32_t));
  }

  void bumpAndWakeAll() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    mac_futex_wake_all(&epoch_, sizeof(uint32_t));
  }

  void bumpAndWakeN(int n, int totalWaiters) {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    if (n >= totalWaiters) {
      mac_futex_wake_all(&epoch_, sizeof(uint32_t));
    } else {
      for (int i = 0; i < n; ++i) {
        mac_futex_wake_one(&epoch_, sizeof(uint32_t));
      }
    }
  }

  uint32_t wait(uint32_t expectedEpoch) const {
    uint32_t current;
    // Allow spurious wake
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      mac_futex_wait(&epoch_, expectedEpoch, sizeof(uint32_t));
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t current() const {
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t waitFor(uint32_t expectedEpoch, uint64_t relTimeUs) const {
    uint32_t current;
    if ((current = epoch_.load(std::memory_order_acquire)) != expectedEpoch) {
      return current;
    }

    // Allow spurious wake
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      mac_futex_wait_for(&epoch_, current, sizeof(uint32_t), relTimeUs);
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

 private:
  mutable std::atomic<uint32_t> epoch_;
};

#elif defined(_WIN32)

class EpochWaiter {
 public:
  EpochWaiter() : epoch_(0) {}

  void bumpAndWake() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    // Always WakeAll on Windows. WakeByAddressSingle and WakeByAddressAll
    // have comparable per-call kernel cost, but WakeAll keeps more threads
    // in their spin phase (kBackoffYield iterations) where they can pick up
    // subsequent work without another kernel wake transition.
    WakeByAddressAll(&epoch_);
  }

  void bumpAndWakeAll() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    WakeByAddressAll(&epoch_);
  }

  void bumpAndWakeN(int /*n*/, int totalWaiters) {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    // Only use WakeByAddressSingle when there is exactly one waiter —
    // it avoids waking threads that aren't waiting. Otherwise, WakeAll
    // is preferred: same single kernel transition, and keeps threads
    // spinning where they can absorb follow-up work without re-waking.
    if (totalWaiters <= 1) {
      WakeByAddressSingle(&epoch_);
    } else {
      WakeByAddressAll(&epoch_);
    }
  }

  uint32_t wait(uint32_t expectedEpoch) const {
    uint32_t current;
    // Allow spurious wake
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      WaitOnAddress(&epoch_, &current, sizeof(uint32_t), kInfiniteWin);
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t current() const {
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t waitFor(uint32_t expectedEpoch, uint64_t relTimeUs) const {
    uint32_t current;
    if ((current = epoch_.load(std::memory_order_acquire)) != expectedEpoch) {
      return current;
    }

    // WaitOnAddress takes DWORD (unsigned long) milliseconds.
    // Clamp to the largest finite timeout to avoid overflow or
    // accidentally passing INFINITE (0xFFFFFFFF = wait forever).
    uint64_t msVal = std::max(uint64_t{1}, relTimeUs / 1000);
    unsigned long msWait = msVal >= kInfiniteWin ? static_cast<unsigned long>(kInfiniteWin - 1)
                                                 : static_cast<unsigned long>(msVal);
    // Allow spurious wake
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      WaitOnAddress(&epoch_, &current, sizeof(uint32_t), msWait);
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

 private:
  mutable std::atomic<uint32_t> epoch_;
};

#else

// Fallback C++11 implementation.
class EpochWaiter {
 public:
  EpochWaiter() : epoch_(0) {}

  void bumpAndWake() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    cv_.notify_one();
  }

  void bumpAndWakeAll() {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    cv_.notify_all();
  }

  void bumpAndWakeN(int n, int totalWaiters) {
    epoch_.fetch_add(1, std::memory_order_acq_rel);
    if (n >= totalWaiters) {
      cv_.notify_all();
    } else {
      for (int i = 0; i < n; ++i) {
        cv_.notify_one();
      }
    }
  }

  uint32_t wait(uint32_t expectedEpoch) const {
    uint32_t current;
    // Allow spurious wake
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      std::unique_lock<std::mutex> lk(mtx_);
      cv_.wait(lk);
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t current() const {
    return epoch_.load(std::memory_order_acquire);
  }

  uint32_t waitFor(uint32_t expectedEpoch, uint64_t relTimeUs) const {
    uint32_t current;
    if ((current = epoch_.load(std::memory_order_acquire)) != expectedEpoch) {
      return current;
    }

    // Allow spurious wake
    if ((current = epoch_.load(std::memory_order_acquire)) == expectedEpoch) {
      std::unique_lock<std::mutex> lk(mtx_);
      cv_.wait_for(lk, std::chrono::microseconds(static_cast<int64_t>(relTimeUs)));
    } else {
      return current;
    }
    return epoch_.load(std::memory_order_acquire);
  }

 private:
  mutable std::mutex mtx_;
  mutable std::condition_variable cv_;
  std::atomic<uint32_t> epoch_;
};

#endif // platform

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/future_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <future>

#include <dispenso/completion_event.h>
#include <dispenso/once_function.h>
#include <dispenso/task_set.h>

namespace dispenso {
namespace detail {

template <typename Result>
class FutureImplResultMember {
 public:
  Result& result() const {
#if defined(__cpp_exceptions)
    if (exception_) {
      std::rethrow_exception(exception_);
    }
#endif // __cpp_exceptions
    return *reinterpret_cast<Result*>(resultBuf_);
  }

  ~FutureImplResultMember() {
#if defined(__cpp_exceptions)
    // We don't want to call the destructor if we never set the result.
    if (exception_) {
      return;
    }
#endif // __cpp_exceptions

    reinterpret_cast<Result*>(resultBuf_)->~Result();
  }

 protected:
  template <typename Func>
  void runToResult(Func&& f) {
#if defined(__cpp_exceptions)
    try {
      new (resultBuf_) Result(f());
    } catch (...) {
      exception_ = std::current_exception();
    }
#else
    new (resultBuf_) Result(f());
#endif // __cpp_exceptions
  }

  template <typename T>
  void setAsResult(T&& t) {
    new (resultBuf_) Result(std::forward<T>(t));
  }

  alignas(Result) mutable char resultBuf_[sizeof(Result)];

  // Always present to ensure stable ABI layout regardless of __cpp_exceptions.
  std::exception_ptr exception_;
};

template <typename Result>
class FutureImplResultMember<Result&> {
 protected:
  template <typename Func>
  void runToResult(Func&& f) {
#if defined(__cpp_exceptions)
    try {
      result_ = &f();
    } catch (...) {
      exception_ = std::current_exception();
    }
#else
    result_ = &f();
#endif // __cpp_exceptions
  }

  Result& result() const {
#if defined(__cpp_exceptions)
    if (exception_) {
      std::rethrow_exception(exception_);
    }
#endif // __cpp_exceptions
    return *result_;
  }

  void setAsResult(Result* res) {
    result_ = res;
  }

  Result* result_;

  // Always present to ensure stable ABI layout regardless of __cpp_exceptions.
  std::exception_ptr exception_;
};

template <>
class FutureImplResultMember<void> {
 protected:
  template <typename Func>
  void runToResult(Func&& f) {
#if defined(__cpp_exceptions)
    try {
      f();
    } catch (...) {
      exception_ = std::current_exception();
    }
#else
    f();
#endif // __cpp_exceptions
  }
  void result() const {
#if defined(__cpp_exceptions)
    if (exception_) {
      std::rethrow_exception(exception_);
    }
#endif // __cpp_exceptions
  }
  void setAsResult() const {}

  // Always present to ensure stable ABI layout regardless of __cpp_exceptions.
  std::exception_ptr exception_;
};

template <typename Result>
class FutureBase;

template <typename Result>
class FutureImplBase : private FutureImplResultMember<Result>, public OnceCallable {
 public:
  enum Status { kNotStarted, kRunning, kReady };

  using FutureImplResultMember<Result>::result;
  using FutureImplResultMember<Result>::setAsResult;
  using FutureImplResultMember<Result>::runToResult;

  bool ready() {
    return status_.intrusiveStatus().load(std::memory_order_acquire) == kReady;
  }

  void run() override {
    (void)run(kNotStarted);
    decRefCountMaybeDestroy();
  }

  void wait() {
    if (waitCommon(true)) {
      return;
    }
    status_.wait(kReady);
  }

  template <class Rep, class Period>
  std::future_status waitFor(const std::chrono::duration<Rep, Period>& timeoutDuration) {
    if (waitCommon(allowInline_) || status_.waitFor(kReady, timeoutDuration)) {
      return std::future_status::ready;
    }
    return std::future_status::timeout;
  }

  template <class Clock, class Duration>
  std::future_status waitUntil(const std::chrono::time_point<Clock, Duration>& timeoutTime) {
    if (waitCommon(allowInline_) || status_.waitUntil(kReady, timeoutTime)) {
      return std::future_status::ready;
    }
    return std::future_status::timeout;
  }

  void incRefCount() {
    refCount_.fetch_add(1, std::memory_order_acquire);
  }

  void decRefCountMaybeDestroy() {
    DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(&refCount_);
    if (refCount_.fetch_sub(1, std::memory_order_release) == 1) {
      DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(&refCount_);
      dealloc();
    }
  }

  void setReady() {
    status_.intrusiveStatus().store(kReady, std::memory_order_release);
    refCount_.store(1, std::memory_order_release);
  }

  void setAllowInline(bool allow) {
    allowInline_ = allow;
  }

  void setTaskSetCounter(std::atomic<ssize_t>* tsc) {
    taskSetCounter_ = tsc;
  }

 protected:
  bool run(int s) {
    while (s == kNotStarted) {
      if (status_.intrusiveStatus().compare_exchange_weak(s, kRunning, std::memory_order_acq_rel)) {
        runFunc();
        status_.notify(kReady);
        if (taskSetCounter_) {
          //  If we want TaskSet::wait to imply Future::is_ready(),
          //  we need to signal that *after* setting the Future status to ready.
          taskSetCounter_->fetch_sub(1, std::memory_order_release);
        }
        tryExecuteThenChain();
        return true;
      }
    }
    return false;
  }

  using ThenChainInvoke = void(void*, void*);

  struct ThenChain {
    ThenChain* next;
    void* impl;
    void* schedulable;
    ThenChainInvoke* invoke;

    ThenChain* scheduleDestroyAndGetNext() {
      invoke(impl, schedulable);
      constexpr size_t kImplSize = static_cast<size_t>(nextPow2(sizeof(this)));
      auto* ret = this->next;
      deallocSmallBuffer<kImplSize>(this);
      return ret;
    }
  };

  void tryExecuteThenChain() {
    ThenChain* head = thenChain_.load(std::memory_order_acquire);
    // While the chain contains anything, let's try to get it and dispatch the chain.
    while (head) {
      if (thenChain_.compare_exchange_weak(head, nullptr, std::memory_order_acq_rel)) {
        // Managed to exchange with head, value of thenChain_ now points to null chain.
        // Head points to the implicit list of items to be executed.
        while (head) {
          head = head->scheduleDestroyAndGetNext();
        }
        // At this point, the list is exhausted, so we exit the outer loop too.  It would be valid
        // to get head again and try all over again, but it is guaranteed that if another link was
        // added concurrently to our dispatch, that thread will attempt to dispatch it's own chain,
        // so we just exit the loop instead.
      }
    }
  }

  template <typename SomeFutureImpl, typename Schedulable>
  static void thenChainInvokeAsync(void* implv, void* schedulablev) {
    SomeFutureImpl* impl = reinterpret_cast<SomeFutureImpl*>(implv);
    Schedulable* schedulable = reinterpret_cast<Schedulable*>(schedulablev);
    schedulable->schedule(OnceFunction(impl, true), ForceQueuingTag());
  }

  template <typename SomeFutureImpl, typename Schedulable>
  static void thenChainInvoke(void* implv, void* schedulablev) {
    SomeFutureImpl* impl = reinterpret_cast<SomeFutureImpl*>(implv);
    Schedulable* schedulable = reinterpret_cast<Schedulable*>(schedulablev);
    schedulable->schedule(OnceFunction(impl, true));
  }

  template <typename SomeFutureImpl, typename Schedulable>
  void addToThenChainOrExecute(SomeFutureImpl* impl, Schedulable& sched, std::launch asyncPolicy) {
    if (status_.intrusiveStatus().load(std::memory_order_acquire) == kReady) {
      if ((asyncPolicy & std::launch::async) == std::launch::async) {
        sched.schedule(OnceFunction(impl, true), ForceQueuingTag());
      } else {
        sched.schedule(OnceFunction(impl, true));
      }
      return;
    }

    constexpr size_t kImplSize = static_cast<size_t>(nextPow2(sizeof(ThenChain)));
    auto* buffer = allocSmallBuffer<kImplSize>();
    ThenChain* link = reinterpret_cast<ThenChain*>(buffer);
    link->impl = impl;
    using NonConstSchedulable = std::remove_const_t<Schedulable>;
    NonConstSchedulable* nonConstSched = const_cast<NonConstSchedulable*>(&sched);
    link->schedulable = nonConstSched;
    if ((asyncPolicy & std::launch::async) == std::launch::async) {
      link->invoke = thenChainInvokeAsync<SomeFutureImpl, Schedulable>;
    } else {
      link->invoke = thenChainInvoke<SomeFutureImpl, Schedulable>;
    }
    link->next = thenChain_.load(std::memory_order_acquire);
    while (!thenChain_.compare_exchange_weak(link->next, link, std::memory_order_acq_rel)) {
    }

    // Okay, one last thing.  It is possible that we added to the thenChain just after
    // tryExecuteThenChain was called from run(). We still need to ensure that this work is kicked
    // off, so just double check here, and execute if that may have happened.
    if (status_.intrusiveStatus().load(std::memory_order_acquire) == kReady) {
      tryExecuteThenChain();
    }
  }

  inline bool waitCommon(bool allowInline) {
    int s = status_.intrusiveStatus().load(std::memory_order_acquire);
    return s == kReady || (allowInline && run(s));
  }

  virtual void runFunc() = 0;

  virtual void dealloc() = 0;

  // Futures always run to completion (no cancellation), so destroyOnly() is unreachable.
  void destroyOnly() override {
    assert(false && "destroyOnly() should never be called on a FutureImpl");
  }

  ~FutureImplBase() override = default;

  bool allowInline_;

  CompletionEventImpl status_{kNotStarted};
  std::atomic<uint32_t> refCount_{2};
  std::atomic<ssize_t>* taskSetCounter_{nullptr};

  std::atomic<ThenChain*> thenChain_{nullptr};

  template <typename R, typename T>
  friend FutureImplBase<R&>* createValueFutureImplReady(std::reference_wrapper<T> t);
  template <typename R, typename T>
  friend FutureImplBase<R>* createRefFutureImplReady(T&& t);

  template <typename R>
  friend class FutureBase;
}; // namespace detail

template <size_t kBufferSize, typename F, typename Result>
class FutureImplSmall : public FutureImplBase<Result> {
 public:
  FutureImplSmall(F&& f) {
    new (func_) F(std::move(f));
  }

 protected:
  void runFunc() override {
    F* f = reinterpret_cast<F*>(func_);
    this->runToResult(*f);
    f->~F();
  }
  void dealloc() override {
    this->~FutureImplSmall();
    deallocSmallBuffer<kBufferSize>(this);
  }

  ~FutureImplSmall() override {}

 private:
  alignas(F) char func_[sizeof(F)];
};

template <size_t kBufferSize, typename Result>
class FutureImplSmall<kBufferSize, void, Result> : public FutureImplBase<Result> {
 protected:
  void runFunc() override {}
  void dealloc() override {
    this->~FutureImplSmall();
    deallocSmallBuffer<kBufferSize>(this);
  }

  ~FutureImplSmall() override {}
};

template <typename F, typename Result>
class FutureImplAlloc : public FutureImplBase<Result> {
 public:
  FutureImplAlloc(F&& f) {
    new (func_) F(std::move(f));
  }

 protected:
  void runFunc() override {
    F* f = reinterpret_cast<F*>(func_);
    this->runToResult(*f);
    f->~F();
  }
  void dealloc() override {
    this->~FutureImplAlloc();
    alignedFree(this);
  }

  ~FutureImplAlloc() override {}

 private:
  alignas(F) char func_[sizeof(F)];
};

template <typename Result, typename F>
inline FutureImplBase<Result>*
createFutureImpl(F&& f, bool allowInline, std::atomic<ssize_t>* taskSetCounter) {
  using FNoRef = typename std::remove_reference<F>::type;
  constexpr size_t kImplSize =
      static_cast<size_t>(nextPow2(sizeof(FutureImplSmall<16, FNoRef, Result>)));
  using SmallT = FutureImplSmall<kImplSize, FNoRef, Result>;

  FutureImplBase<Result>* ret = new (allocSmallBuffer<kImplSize>()) SmallT(std::forward<F>(f));

  ret->setAllowInline(allowInline);
  ret->setTaskSetCounter(taskSetCounter);
  return ret;
}

template <typename Result, typename T>
inline FutureImplBase<Result>* createValueFutureImplReady(T&& t) {
  constexpr size_t kImplSize =
      static_cast<size_t>(nextPow2(sizeof(FutureImplSmall<16, void, Result>)));
  using SmallT = FutureImplSmall<kImplSize, void, Result>;

  FutureImplBase<Result>* ret = new (allocSmallBuffer<kImplSize>()) SmallT();

  ret->setAsResult(std::forward<T>(t));
  ret->setReady();
  return ret;
}

template <typename X>
inline FutureImplBase<X&>* createRefFutureImplReady(std::reference_wrapper<X> x) {
  constexpr size_t kImplSize = static_cast<size_t>(nextPow2(sizeof(FutureImplSmall<16, void, X&>)));
  using SmallT = FutureImplSmall<kImplSize, void, X&>;
  FutureImplBase<X&>* ret = new (allocSmallBuffer<kImplSize>()) SmallT();
  ret->setAsResult(&x.get());
  ret->setReady();
  return ret;
}

inline FutureImplBase<void>* createVoidFutureImplReady() {
  constexpr size_t kImplSize =
      static_cast<size_t>(nextPow2(sizeof(FutureImplSmall<16, void, void>)));
  using SmallT = FutureImplSmall<kImplSize, void, void>;
  FutureImplBase<void>* ret = new (allocSmallBuffer<kImplSize>()) SmallT();
  ret->setReady();
  return ret;
}

template <typename TaskSetType>
struct TaskSetInterceptionInvoker {
  TaskSetInterceptionInvoker(TaskSetType& ts) : taskSet(ts) {}

  void schedule(OnceFunction f) {
    savedOffFn = std::move(f);
  }
  void schedule(OnceFunction f, ForceQueuingTag) {
    savedOffFn = std::move(f);
  }
  TaskSetType& taskSet;
  OnceFunction savedOffFn;
};

template <typename Result>
class FutureBase {
 protected:
  FutureBase() noexcept : impl_(nullptr) {}
  FutureBase(FutureBase&& f) noexcept : impl_(f.impl_) {
    f.impl_ = nullptr;
  }
  FutureBase(const FutureBase& f) noexcept {
    impl_ = f.impl_;
    if (impl_) {
      impl_->incRefCount();
    }
  }
  template <typename F, typename Schedulable>
  FutureBase(F&& f, Schedulable& schedulable, std::launch asyncPolicy, std::launch deferredPolicy)
      : impl_(
            createFutureImpl<Result>(
                std::forward<F>(f),
                (deferredPolicy & std::launch::deferred) == std::launch::deferred,
                nullptr)) {
    if ((asyncPolicy & std::launch::async) == std::launch::async) {
      schedulable.schedule(OnceFunction(impl_, true), ForceQueuingTag());
    } else {
      schedulable.schedule(OnceFunction(impl_, true));
    }
  }

  template <typename F>
  FutureBase(F&& f, TaskSet& taskSet, std::launch asyncPolicy, std::launch deferredPolicy)
      : impl_(
            createFutureImpl<Result>(
                std::forward<F>(f),
                (deferredPolicy & std::launch::deferred) == std::launch::deferred,
                &taskSet.outstandingTaskCount_)) {
    taskSet.outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
    if ((asyncPolicy & std::launch::async) == std::launch::async) {
      taskSet.pool().schedule(OnceFunction(impl_, true), ForceQueuingTag());
    } else {
      taskSet.pool().schedule(OnceFunction(impl_, true));
    }
  }

  template <typename F>
  FutureBase(F&& f, ConcurrentTaskSet& taskSet, std::launch asyncPolicy, std::launch deferredPolicy)
      : impl_(
            createFutureImpl<Result>(
                std::forward<F>(f),
                (deferredPolicy & std::launch::deferred) == std::launch::deferred,
                &taskSet.outstandingTaskCount_)) {
    taskSet.outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
    if ((asyncPolicy & std::launch::async) == std::launch::async) {
      taskSet.pool().schedule(OnceFunction(impl_, true), ForceQueuingTag());
    } else {
      taskSet.pool().schedule(OnceFunction(impl_, true));
    }
  }

  template <typename F, typename TaskSetType>
  FutureBase(
      F&& f,
      TaskSetInterceptionInvoker<TaskSetType>& invoker,
      std::launch asyncPolicy,
      std::launch deferredPolicy)
      : impl_(
            createFutureImpl<Result>(
                std::forward<F>(f),
                (deferredPolicy & std::launch::deferred) == std::launch::deferred,
                &invoker.taskSet.outstandingTaskCount_)) {
    invoker.taskSet.outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
    if ((asyncPolicy & std::launch::async) == std::launch::async) {
      invoker.schedule(OnceFunction(impl_, true), ForceQueuingTag());
    } else {
      invoker.schedule(OnceFunction(impl_, true));
    }
  }

  void move(FutureBase&& f) noexcept {
    if (impl_ == f.impl_) {
      return;
    } else if (impl_) {
      impl_->decRefCountMaybeDestroy();
    }
    impl_ = f.impl_;
    f.impl_ = nullptr;
  }
  void copy(const FutureBase& f) {
    if (impl_ != f.impl_) {
      if (impl_ != nullptr) {
        impl_->decRefCountMaybeDestroy();
      }
      impl_ = f.impl_;
      if (impl_) {
        impl_->incRefCount();
      }
    }
  }
  ~FutureBase() {
    if (impl_) {
      impl_->decRefCountMaybeDestroy();
    }
  }
  bool valid() const noexcept {
    return impl_;
  }
  bool is_ready() const {
    assertValid();
    return impl_->ready();
  }
  void wait() const {
    assertValid();
    impl_->wait();
  }
  template <class Rep, class Period>
  std::future_status wait_for(const std::chrono::duration<Rep, Period>& timeoutDuration) const {
    assertValid();
    return impl_->waitFor(timeoutDuration);
  }
  template <class Clock, class Duration>
  std::future_status wait_until(const std::chrono::time_point<Clock, Duration>& timeoutTime) const {
    assertValid();
    return impl_->waitUntil(timeoutTime);
  }

  template <typename RetResult, typename F, typename Schedulable>
  FutureImplBase<RetResult>*
  thenImpl(F&& f, Schedulable& sched, std::launch asyncPolicy, std::launch deferredPolicy);

  template <typename RetResult, typename F>
  FutureImplBase<RetResult>*
  thenImpl(F&& f, TaskSet& sched, std::launch asyncPolicy, std::launch deferredPolicy);
  template <typename RetResult, typename F>
  FutureImplBase<RetResult>*
  thenImpl(F&& f, ConcurrentTaskSet& sched, std::launch asyncPolicy, std::launch deferredPolicy);

#if defined DISPENSO_DEBUG
  void assertValid() const {
    assert(valid());
  }
#else
  void assertValid() const {}
#endif // DISPENSO_DEBUG

  mutable FutureImplBase<Result>* impl_;
};

struct ReadyTag {};

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/future_impl2.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

namespace dispenso {
namespace detail {

// Implementation note regarding then():
//
// At some point we tried to add Future::then()&&.  The impetus was that we figured we could avoid
// making an extra copy of the *this, avoiding primarily an atomic reference increment and
// decrement.  This lead to a 15% speedup in benchmarks.  The problem was that we were retaining a
// copy of the bare pointer impl_ in order to call addToThenChainOrExecute.  This could lead to a
// case where our impl_ only had one reference going into addToThenChainOrExecute, and internal to
// that, the func is scheduled, which creates a race between the execution and potential cleanup of
// the future internals, against completion of addToThenChainOrExecute.
//
// In reality this leads to *requiring* a bump of the reference count prior to
// addToThenChainOrExecute, and a decrement afterwards.  This isn't at all cheaper (in benchmarks)
// than just making the copy.  The moral of the story?  Don't bother with Future::then()&&.

template <typename Result>
template <typename RetResult, typename F, typename Schedulable>
FutureImplBase<RetResult>* FutureBase<Result>::thenImpl(
    F&& f,
    Schedulable& sched,
    std::launch asyncPolicy,
    std::launch deferredPolicy) {
  Future<Result> copy(*this);
  auto func = [f = std::move(f), copy = std::move(copy)]() mutable -> RetResult {
    copy.wait();
    return f(std::move(copy));
  };

  auto* retImpl = createFutureImpl<RetResult>(
      std::move(func), (deferredPolicy & std::launch::deferred) == std::launch::deferred, nullptr);
  impl_->addToThenChainOrExecute(retImpl, sched, asyncPolicy);
  return retImpl;
}

template <typename Result>
template <typename RetResult, typename F>
FutureImplBase<RetResult>* FutureBase<Result>::thenImpl(
    F&& f,
    TaskSet& sched,
    std::launch asyncPolicy,
    std::launch deferredPolicy) {
  Future<Result> copy(*this);
  auto func = [f = std::move(f), copy = std::move(copy)]() mutable -> RetResult {
    copy.wait();
    return f(std::move(copy));
  };

  sched.outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
  auto* retImpl = createFutureImpl<RetResult>(
      std::move(func),
      (deferredPolicy & std::launch::deferred) == std::launch::deferred,
      &sched.outstandingTaskCount_);
  impl_->addToThenChainOrExecute(retImpl, sched.pool(), asyncPolicy);
  return retImpl;
}

template <typename Result>
template <typename RetResult, typename F>
FutureImplBase<RetResult>* FutureBase<Result>::thenImpl(
    F&& f,
    ConcurrentTaskSet& sched,
    std::launch asyncPolicy,
    std::launch deferredPolicy) {
  Future<Result> copy(*this);
  auto func = [f = std::move(f), copy = std::move(copy)]() mutable -> RetResult {
    copy.wait();
    return f(std::move(copy));
  };

  sched.outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
  auto* retImpl = createFutureImpl<RetResult>(
      std::move(func),
      (deferredPolicy & std::launch::deferred) == std::launch::deferred,
      &sched.outstandingTaskCount_);
  impl_->addToThenChainOrExecute(retImpl, sched.pool(), asyncPolicy);
  return retImpl;
}

template <size_t index, typename... Ts>
struct ForEachApply {
  template <typename F>
  void operator()(std::tuple<Ts...>& t, F f) {
    if (f(std::get<index>(t))) {
      ForEachApply<index - 1, Ts...>{}(t, f);
    }
  }
};

template <typename... Ts>
struct ForEachApply<size_t{0}, Ts...> {
  template <typename F>
  void operator()(std::tuple<Ts...>& t, F f) {
    f(std::get<0>(t));
  }
};

template <typename F, typename... Ts>
void forEach(std::tuple<Ts...>& t, F f) {
  constexpr size_t size = std::tuple_size<std::tuple<Ts...>>::value;
  ForEachApply<size - 1, Ts...>{}(t, f);
}

template <typename VecType>
struct WhenAllSharedVec {
  VecType vec;
  std::atomic<size_t> count;
  OnceFunction f;

  template <typename InputIt, typename T = typename std::iterator_traits<InputIt>::value_type>
  WhenAllSharedVec(InputIt first, InputIt last) : vec(first, last), count(vec.size()) {}
};

template <typename Tuple>
struct WhenAllSharedTuple {
  Tuple tuple;
  std::atomic<size_t> count;
  OnceFunction f;
  template <typename... Types>
  WhenAllSharedTuple(Types&&... args)
      : tuple(std::make_tuple(std::forward<Types>(args)...)),
        count(std::tuple_size<Tuple>::value) {}
};

struct InterceptionInvoker {
  void schedule(OnceFunction f) {
    savedOffFn = std::move(f);
  }
  void schedule(OnceFunction f, ForceQueuingTag) {
    savedOffFn = std::move(f);
  }
  OnceFunction savedOffFn;
};

template <typename Invoker, typename... Futures>
auto whenAllTuple(Invoker& invoker, Futures&&... futures)
    -> Future<std::tuple<std::decay_t<Futures>...>> {
  using TupleType = std::tuple<std::decay_t<Futures>...>;
  using ResultFuture = Future<TupleType>;

  // TODO(bbudge): Can write something faster than make_shared using SmallBufferAllocator.
  auto shared =
      std::make_shared<detail::WhenAllSharedTuple<TupleType>>(std::forward<Futures>(futures)...);

  auto whenComplete = [shared]() -> TupleType {
    forEach(shared->tuple, [&shared](auto& future) {
      if (0 == shared->count.load(std::memory_order_acquire)) {
        return false;
      }
      future.wait();
      return true;
    });
    return std::move(shared->tuple);
  };

  ResultFuture res(std::move(whenComplete), invoker);

  shared->f = std::move(invoker.savedOffFn);
  // Avoid sequencing issue by getting the reference prior to the std::move.
  auto& tuple = shared->tuple;
  forEach(tuple, [shared = std::move(shared)](auto& future) {
    future.then(
        [shared](auto&&) {
          if (shared->count.fetch_sub(1, std::memory_order_release) == 1) {
            shared->f();
          }
        },
        kImmediateInvoker);
    return true;
  });

  return res;
}

template <typename Invoker, typename InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>>
whenAllIterators(Invoker& invoker, InputIt first, InputIt last) {
  using VecType = std::vector<typename std::iterator_traits<InputIt>::value_type>;
  using ResultFuture = Future<VecType>;

  if (first == last) {
    return make_ready_future(VecType());
  }

  // TODO(bbudge): Can write something faster than make_shared using SmallBufferAllocator.
  auto shared = std::make_shared<detail::WhenAllSharedVec<VecType>>(first, last);

  auto whenComplete = [shared]() -> VecType {
    for (auto& f : shared->vec) {
      if (0 == shared->count.load(std::memory_order_acquire)) {
        break;
      }
      f.wait();
    }
    return std::move(shared->vec);
  };

  ResultFuture res(std::move(whenComplete), invoker);

  shared->f = std::move(invoker.savedOffFn);
  for (auto& s : shared->vec) {
    s.then(
        [shared](auto&&) {
          if (shared->count.fetch_sub(1, std::memory_order_release) == 1) {
            shared->f();
          }
        },
        kImmediateInvoker);
  }

  return res;
}

} // namespace detail

template <typename InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>> when_all(
    InputIt first,
    InputIt last) {
  detail::InterceptionInvoker interceptor;
  return whenAllIterators(interceptor, first, last);
}

template <typename InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>>
when_all(TaskSet& taskSet, InputIt first, InputIt last) {
  detail::TaskSetInterceptionInvoker<TaskSet> interceptor(taskSet);
  return whenAllIterators(interceptor, first, last);
}

template <typename InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>>
when_all(ConcurrentTaskSet& taskSet, InputIt first, InputIt last) {
  detail::TaskSetInterceptionInvoker<ConcurrentTaskSet> interceptor(taskSet);
  return whenAllIterators(interceptor, first, last);
}

inline auto when_all() -> Future<std::tuple<>> {
  return make_ready_future(std::tuple<>());
}

inline auto when_all(TaskSet&) -> Future<std::tuple<>> {
  return make_ready_future(std::tuple<>());
}

inline auto when_all(ConcurrentTaskSet&) -> Future<std::tuple<>> {
  return make_ready_future(std::tuple<>());
}

template <class... Futures>
auto when_all(Futures&&... futures) -> Future<std::tuple<std::decay_t<Futures>...>> {
  detail::InterceptionInvoker interceptor;
  return whenAllTuple(interceptor, std::forward<Futures>(futures)...);
}

template <typename... Futures>
auto when_all(TaskSet& taskSet, Futures&&... futures)
    -> Future<std::tuple<std::decay_t<Futures>...>> {
  detail::TaskSetInterceptionInvoker<TaskSet> interceptor(taskSet);
  return whenAllTuple(interceptor, std::forward<Futures>(futures)...);
}

template <typename... Futures>
auto when_all(ConcurrentTaskSet& taskSet, Futures&&... futures)
    -> Future<std::tuple<std::decay_t<Futures>...>> {
  detail::TaskSetInterceptionInvoker<ConcurrentTaskSet> interceptor(taskSet);
  return whenAllTuple(interceptor, std::forward<Futures>(futures)...);
}

} // namespace dispenso


================================================
FILE: dispenso/detail/graph_executor_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once
#include <dispenso/graph.h>
#include <dispenso/platform.h>
#include <dispenso/task_set.h>

#include <unordered_set>

namespace detail {

// Maximum recursion depth for inline graph node scheduling before forcing work
// through the thread pool queue. Prevents unbounded stack growth when CTS inlines
// evaluateNodeConcurrently calls for additional ready dependents.
static constexpr int kMaxGraphInlineDepth = 32;

class ExecutorBase {
 protected:
  inline static bool hasNoIncompletePredecessors(const dispenso::Node& node) {
    return node.numIncompletePredecessors_.load(std::memory_order_relaxed) == 0;
  }

  inline static void addIncompletePredecessor(const dispenso::Node& node) {
    if (node.isCompleted()) {
      node.numIncompletePredecessors_.store(1, std::memory_order_relaxed);
    } else {
      node.numIncompletePredecessors_.fetch_add(1, std::memory_order_relaxed);
    }
  }

  inline static void ifIncompleteAddIncompletePredecessor(const dispenso::Node& node) {
    if (!node.isCompleted()) {
      node.numIncompletePredecessors_.fetch_add(1, std::memory_order_relaxed);
    }
  }

  inline static bool decNumIncompletePredecessors(
      const dispenso::Node& node,
      std::memory_order order) {
    return node.numIncompletePredecessors_.fetch_sub(1, order) == 1;
  }

  inline static bool decNumIncompletePredecessors(
      const dispenso::BiPropNode& node,
      std::memory_order order) {
    const std::memory_order loadOrder =
        order == std::memory_order_relaxed ? std::memory_order_relaxed : std::memory_order_acquire;
    if (node.numIncompletePredecessors_.load(loadOrder) == dispenso::Node::kCompleted) {
      return false;
    }

    return node.numIncompletePredecessors_.fetch_sub(1, order) == 1;
  }

  template <class N>
  inline static void evaluateNodeConcurrently(dispenso::ConcurrentTaskSet& tasks, const N* node) {
    // Track recursion depth to prevent stack overflow. CTS may inline scheduled
    // lambdas when load is high, causing evaluateNodeConcurrently to recurse
    // through the schedule→inline→evaluate path. When depth exceeds the limit,
    // force work through the pool queue to bound stack growth.
    static DISPENSO_THREAD_LOCAL int depth = 0;
    struct DepthGuard {
      DISPENSO_INLINE DepthGuard(int& d) : d_(d) {
        ++d_;
      }
      DISPENSO_INLINE ~DepthGuard() {
        --d_;
      }
      int& d_;
    };
    DepthGuard dGuard(depth);

    // Process nodes in a loop, continuing inline with first ready dependent
    // to avoid task scheduling overhead on the critical path
    while (node != nullptr) {
      node->run();

      const N* inlineNext = nullptr;
      for (const dispenso::Node* const d : node->dependents_) {
        if (decNumIncompletePredecessors(static_cast<const N&>(*d), std::memory_order_acq_rel)) {
          const N* dep = static_cast<const N*>(d);
          if (inlineNext == nullptr) {
            // First ready dependent: continue with it inline
            inlineNext = dep;
          } else if (depth < kMaxGraphInlineDepth) {
            // Additional ready dependents: schedule to task queue
            tasks.schedule([&tasks, dep]() { evaluateNodeConcurrently(tasks, dep); });
          } else {
            // Depth limit reached: force enqueue to prevent stack overflow
            tasks.schedule(
                [&tasks, dep]() { evaluateNodeConcurrently(tasks, dep); },
                dispenso::ForceQueuingTag());
          }
        }
      }
      node = inlineNext;
    }
  }

  static void appendGroup(
      const dispenso::Node* /* node */,
      std::unordered_set<const std::vector<const dispenso::BiPropNode*>*>& /* groups */) {}

  static void appendGroup(
      const dispenso::BiPropNode* node,
      std::unordered_set<const std::vector<const dispenso::BiPropNode*>*>& groups) {
    const std::vector<const dispenso::BiPropNode*>* group = node->biPropSet_.get();
    if (group != nullptr) {
      groups.insert(group);
    }
  }
};

} // namespace detail


================================================
FILE: dispenso/detail/math.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <stdint.h>
#include <type_traits>

#if defined(_WIN32)
#include <intrin.h>
#endif //_WIN32

namespace dispenso {

namespace detail {

constexpr uint64_t nextPow2(uint64_t v) {
  // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
  v--;
  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;
  v |= v >> 32;
  v++;
  return v;
}

constexpr inline uint32_t log2const(uint64_t v) {
  constexpr uint64_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000, 0xFFFFFFFF00000000UL};
  constexpr uint32_t S[] = {1, 2, 4, 8, 16, 32};

  uint32_t r = 0;
  for (uint32_t i = 6; i--;) {
    if (v & b[i]) {
      v >>= S[i];
      r |= S[i];
    }
  }

  return r;
}

constexpr inline uint32_t log2const(uint32_t v) {
  constexpr uint32_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
  constexpr uint32_t S[] = {1, 2, 4, 8, 16};

  uint32_t r = 0;
  for (uint32_t i = 5; i--;) {
    if (v & b[i]) {
      v >>= S[i];
      r |= S[i];
    }
  }

  return r;
}

// On some platforms (e.g. macOS ARM64), size_t is 'unsigned long' which is a distinct type
// from both uint32_t and uint64_t, causing overload ambiguity. This template resolves it
// by delegating to the 64-bit overload; it is SFINAE-disabled on platforms where unsigned
// long already matches one of the fixed-width types (Linux, Windows), so no behavior change.
template <
    typename T = unsigned long,
    typename std::enable_if<
        !std::is_same<T, uint32_t>::value && !std::is_same<T, uint64_t>::value,
        int>::type = 0>
constexpr inline uint32_t log2const(T v) {
  return log2const(static_cast<uint64_t>(v));
}

// --- 64-bit log2 ---

#if (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
inline uint32_t log2(uint64_t v) {
  uint64_t result;
  __asm__("bsrq %1, %0" : "=r"(result) : "r"(v));
  return static_cast<uint32_t>(result);
}
#elif (defined(__GNUC__) || defined(__clang__))
inline uint32_t log2(uint64_t v) {
  return static_cast<uint32_t>(63 - __builtin_clzll(v));
}
#elif defined(_WIN64)
inline uint32_t log2(uint64_t v) {
  unsigned long index;
  _BitScanReverse64(&index, v);
  return static_cast<uint32_t>(index);
}
#elif defined(_WIN32)
inline uint32_t log2(uint64_t v) {
  unsigned long index;
  uint32_t hi = static_cast<uint32_t>(v >> 32);
  if (hi != 0) {
    _BitScanReverse(&index, hi);
    return static_cast<uint32_t>(index + 32);
  }
  _BitScanReverse(&index, static_cast<uint32_t>(v));
  return static_cast<uint32_t>(index);
}
#else
inline uint32_t log2(uint64_t v) {
  return log2const(v);
}
#endif // 64-bit

template <
    typename T = unsigned long,
    typename std::enable_if<
        !std::is_same<T, uint32_t>::value && !std::is_same<T, uint64_t>::value,
        int>::type = 0>
inline uint32_t log2(T v) {
  return log2(static_cast<uint64_t>(v));
}

// --- 32-bit log2 ---

#if (defined(__GNUC__) || defined(__clang__)) && (defined(__x86_64__) || defined(__i386__))
inline uint32_t log2(uint32_t v) {
  uint32_t result;
  __asm__("bsrl %1, %0" : "=r"(result) : "r"(v));
  return result;
}
#elif (defined(__GNUC__) || defined(__clang__))
inline uint32_t log2(uint32_t v) {
  return static_cast<uint32_t>(31 - __builtin_clz(v));
}
#elif defined(_WIN32)
inline uint32_t log2(uint32_t v) {
  unsigned long index;
  _BitScanReverse(&index, v);
  return static_cast<uint32_t>(index);
}
#else
inline uint32_t log2(uint32_t v) {
  return log2const(v);
}
#endif // 32-bit

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/notifier_common.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

// For fallback path
#include <condition_variable>
#include <mutex>

#if defined(__linux__)
#include <errno.h>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace dispenso {
namespace detail {
static int futex(
    int* uaddr,
    int futex_op,
    int val,
    const struct timespec* timeout,
    int* /*uaddr2*/,
    int val3) {
  return static_cast<int>(syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr, val3));
}
} // namespace detail
} // namespace dispenso

#elif defined(__MACH__)
#include <Availability.h>
#include <errno.h>
#include <mach/mach_time.h>

// Detect os_sync_wait_on_address availability (macOS 14.4+)
#if defined(__has_include)
#if __has_include(<os/os_sync_wait_on_address.h>)
#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED >= 140400
#define DISPENSO_HAS_OS_SYNC 1
#include <os/os_sync_wait_on_address.h>
#endif
#endif
#endif

#ifndef DISPENSO_HAS_OS_SYNC
// __ulock APIs are available since macOS 10.12 (Sierra). On older versions (e.g. PPC/10.5 builds
// for MacPorts), fall through to the std::mutex fallback.
#if !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
#define DISPENSO_HAS_ULOCK 1
extern "C" int __ulock_wait(uint32_t operation, void* addr, uint64_t value, uint32_t timeout_us);
extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t value);

#ifndef UL_COMPARE_AND_WAIT
#define UL_COMPARE_AND_WAIT 1
#endif
#ifndef ULF_WAKE_ALL
#define ULF_WAKE_ALL 0x00000100
#endif
#endif // macOS >= 10.12
#endif // DISPENSO_HAS_OS_SYNC

// DISPENSO_HAS_MAC_FUTEX is set when any mac futex-like API is available.
#if defined(DISPENSO_HAS_OS_SYNC) || defined(DISPENSO_HAS_ULOCK)
#define DISPENSO_HAS_MAC_FUTEX 1
#endif

#ifdef DISPENSO_HAS_MAC_FUTEX
namespace dispenso {
namespace detail {

inline void mac_futex_wait(void* addr, uint64_t expected, size_t size) {
#ifdef DISPENSO_HAS_OS_SYNC
  os_sync_wait_on_address(addr, expected, size, OS_SYNC_WAIT_ON_ADDRESS_NONE);
#else
  (void)size;
  __ulock_wait(UL_COMPARE_AND_WAIT, addr, expected, 0);
#endif
}

inline int mac_futex_wait_for(void* addr, uint64_t expected, size_t size, uint64_t relTimeUs) {
#ifdef DISPENSO_HAS_OS_SYNC
  static mach_timebase_info_data_t sTimebaseInfo = []() {
    mach_timebase_info_data_t i;
    mach_timebase_info(&i);
    return i;
  }();
  uint64_t ns = relTimeUs * 1000;
  uint64_t timeout = ns * sTimebaseInfo.denom / sTimebaseInfo.numer;
  return os_sync_wait_on_address_with_timeout(
      addr, expected, size, OS_SYNC_WAIT_ON_ADDRESS_NONE, OS_CLOCK_MACH_ABSOLUTE_TIME, timeout);
#else
  (void)size;
  // __ulock_wait takes a uint32_t timeout in microseconds, which wraps at ~4295 seconds (~72 min).
  // Timeouts beyond that cause a spurious early wake, which is harmless since callers re-check
  // status in a loop.
  return __ulock_wait(UL_COMPARE_AND_WAIT, addr, expected, static_cast<uint32_t>(relTimeUs));
#endif
}

inline void mac_futex_wake_one(void* addr, size_t size) {
#ifdef DISPENSO_HAS_OS_SYNC
  os_sync_wake_by_address_any(addr, size, OS_SYNC_WAKE_BY_ADDRESS_NONE);
#else
  (void)size;
  __ulock_wake(UL_COMPARE_AND_WAIT, addr, 0);
#endif
}

inline void mac_futex_wake_all(void* addr, size_t size) {
#ifdef DISPENSO_HAS_OS_SYNC
  os_sync_wake_by_address_all(addr, size, OS_SYNC_WAKE_BY_ADDRESS_NONE);
#else
  (void)size;
  __ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, addr, 0);
#endif
}

} // namespace detail
} // namespace dispenso
#endif // DISPENSO_HAS_MAC_FUTEX

#elif defined(_WIN32)

#if (defined(_M_ARM64) || defined(__aarch64__)) && !defined(_ARM64_)
#define _ARM64_
#elif (defined(_M_ARM) || defined(__arm__)) && !defined(_ARM_)
#define _ARM_
#elif (defined(_M_AMD64) || defined(__x86_64__) || defined(_WIN64)) && !defined(_AMD64_)
#define _AMD64_
#elif !defined(_X86_)
#define _X86_
#endif // platform

#include <errhandlingapi.h>
#include <synchapi.h>

namespace dispenso {
namespace detail {

constexpr int kErrorTimeoutWin = 0x000005B4;
constexpr unsigned long kInfiniteWin = static_cast<unsigned long>(-1);

} // namespace detail
} // namespace dispenso

#endif // PLATFORM


================================================
FILE: dispenso/detail/once_callable_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/detail/math.h>
#include <dispenso/platform.h>
#include <dispenso/small_buffer_allocator.h>

namespace dispenso {
namespace detail {

class OnceCallable {
 public:
  virtual void run() = 0;
  virtual void destroyOnly() = 0;
  virtual ~OnceCallable() = default;
};

struct OnceCallableData {
  void* data;
  void (*invoke)(void*, bool run);
};

template <size_t kBufferSize, typename F>
void invokeImpl(void* ptr, bool run) {
  F* f = static_cast<F*>(ptr);
  if (DISPENSO_EXPECT(run, true)) {
    (*f)();
  }
  f->~F();
  deallocSmallBuffer<kBufferSize>(ptr);
}

template <typename F>
inline OnceCallableData createOnceCallable(F&& f) {
  using FNoRef = typename std::remove_reference<F>::type;

  constexpr size_t kAllocSize =
      static_cast<size_t>(nextPow2(std::max(sizeof(FNoRef), alignof(FNoRef))));

  void* buf = allocSmallBuffer<kAllocSize>();
  new (buf) FNoRef(std::forward<F>(f));
  return {buf, &invokeImpl<kAllocSize, FNoRef>};
}

inline void runOnceCallable(void* ptr, bool run) {
  auto* callable = static_cast<OnceCallable*>(ptr);
  if (DISPENSO_EXPECT(run, true)) {
    callable->run();
  } else {
    callable->destroyOnly();
  }
}

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/op_result.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <type_traits>
#include <utility>

namespace dispenso {
namespace detail {

template <typename T>
class OpResult {
 public:
  OpResult() : ptr_(nullptr) {}

  template <
      typename U,
      typename = typename std::enable_if<
          !std::is_same<typename std::decay<U>::type, OpResult<T>>::value>::type>
  OpResult(U&& u) : ptr_(new (buf_) T(std::forward<U>(u))) {}

  OpResult(const OpResult<T>& oth) : ptr_(oth ? new (buf_) T(*oth.ptr_) : nullptr) {}

  OpResult(OpResult<T>&& oth) : ptr_(oth ? new (buf_) T(std::move(*oth.ptr_)) : nullptr) {
    oth.ptr_ = nullptr;
  }

  OpResult& operator=(const OpResult& oth) {
    if (&oth == this) {
      return *this;
    }
    if (ptr_) {
      ptr_->~T();
    }

    if (oth) {
      ptr_ = new (buf_) T(*oth.ptr_);
    } else {
      ptr_ = nullptr;
    }
    return *this;
  }

  OpResult& operator=(OpResult&& oth) {
    if (&oth == this) {
      return *this;
    }
    if (ptr_) {
      ptr_->~T();
    }

    if (oth) {
      ptr_ = new (buf_) T(std::move(*oth.ptr_));
      oth.ptr_ = nullptr;
    } else {
      ptr_ = nullptr;
    }

    return *this;
  }

  ~OpResult() {
    if (ptr_) {
      ptr_->~T();
    }
  }

  template <typename... Args>
  T& emplace(Args&&... args) {
    if (ptr_) {
      ptr_->~T();
    }
    ptr_ = new (buf_) T(std::forward<Args>(args)...);
    return *ptr_;
  }

  operator bool() const {
    return ptr_;
  }

  bool has_value() const {
    return ptr_;
  }

  T& value() {
    return *ptr_;
  }

 private:
  alignas(T) char buf_[sizeof(T)];
  T* ptr_;
};

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/per_thread_info.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/detail/per_thread_info.h>

namespace dispenso {
namespace detail {

namespace {
DISPENSO_THREAD_LOCAL PerThreadInfo g_perThreadInfo;
}
PerThreadInfo& PerPoolPerThreadInfo::info() {
  return g_perThreadInfo;
}

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/per_thread_info.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <dispenso/platform.h>

namespace dispenso {
namespace detail {

struct alignas(kCacheLineSize) PerThreadInfo {
  void* pool = nullptr;
  void* producer = nullptr;
  int parForRecursionLevel = 0;
};

class ParForRecursion {
 public:
  ~ParForRecursion() {
    --parForRecursionLevel_;
  }

 private:
  ParForRecursion(int& parForRecursionLevel) : parForRecursionLevel_(parForRecursionLevel) {
    ++parForRecursionLevel_;
  }

  int& parForRecursionLevel_;
  friend class PerPoolPerThreadInfo;
};

class PerPoolPerThreadInfo {
 public:
  static void registerPool(void* pool, void* producer) {
    auto& i = info();
    i.pool = pool;
    i.producer = producer;
  }

  static void* producer(void* pool) {
    auto& i = info();
    return i.pool == pool ? i.producer : nullptr;
  }

  static bool isParForRecursive(void* pool) {
    auto& i = info();
    return (!i.pool || i.pool == pool) && i.parForRecursionLevel > 0;
  }

  static bool isPoolRecursive(void* pool) {
    return info().pool == pool;
  }

  static ParForRecursion parForRecurse() {
    return ParForRecursion(info().parForRecursionLevel);
  }

 private:
  DISPENSO_DLL_ACCESS static PerThreadInfo& info();
};

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/pipeline_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#if __cplusplus >= 201703L
#include <optional>
#endif // C++17

#include <dispenso/detail/completion_event_impl.h>
#include <dispenso/detail/op_result.h>
#include <dispenso/detail/result_of.h>
#include <dispenso/task_set.h>
#include <dispenso/tsan_annotations.h>

namespace dispenso {
namespace detail {

// Maximum depth for inline pipeline continuation before forcing a schedule through
// the thread pool. Prevents unbounded stack growth when the pool inlines execution.
static constexpr int kMaxPipelineInlineDepth = 32;

class LimitGatedScheduler {
 public:
  LimitGatedScheduler(ConcurrentTaskSet& tasks, ssize_t res)
      : impl_(new (alignedMalloc(sizeof(Impl), alignof(Impl))) Impl(tasks, res)) {}

  template <typename F>
  void schedule(F&& fPipe) {
    impl_->schedule(std::forward<F>(fPipe));
  }

  void wait() {
    impl_->wait();
  }

 private:
  // Put the guts within a unique_ptr to enable this type to be movable.
  class Impl {
   public:
    Impl(ConcurrentTaskSet& tasks, ssize_t res)
        : tasks_(tasks),
          resources_(res),
          unlimited_(res == std::numeric_limits<ssize_t>::max()),
          serial_(res == 1) {}

    template <typename F>
    void schedule(F&& fPipe) {
      outstanding_.fetch_add(1, std::memory_order_acq_rel);

      // RAII guard ensures outstanding_ is decremented even if an exception propagates.
      // Without this, wait() would hang spinning on outstanding_ reaching zero.
      struct OutstandingGuard {
        DISPENSO_INLINE ~OutstandingGuard() {
          outstanding_.fetch_sub(1, std::memory_order_acq_rel);
        }
        std::atomic<size_t>& outstanding_;
      };

      if (unlimited_) {
        tasks_.schedule([this, fPipe = std::move(fPipe)]() mutable {
          OutstandingGuard oGuard{outstanding_};
          if (!tasks_.hasException()) {
            fPipe([]() {});
          }
        });
        return;
      }

      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
      queue_.enqueue([this, fPipe = std::move(fPipe)]() mutable {
        OutstandingGuard oGuard{outstanding_};

        // RAII guard releases the resource slot if the completion callback is never
        // called (i.e. if the user's stage function throws before reaching it).
        // Disarmed on the normal path when the completion callback fires.
        struct ResourceGuard {
          DISPENSO_INLINE ~ResourceGuard() {
            if (armed_) {
              resources_.fetch_add(1, std::memory_order_acq_rel);
            }
          }
          DISPENSO_INLINE void disarm() {
            armed_ = false;
          }
          std::atomic<ssize_t>& resources_;
          bool armed_{true};
        };
        ResourceGuard rGuard{resources_};

#if defined(__cpp_exceptions)
        try {
#endif
          fPipe([this, &rGuard]() {
            rGuard.disarm();
            OnceFunction func;
            DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
            bool deqd = queue_.try_dequeue(func);
            DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
            if (deqd) {
              // For serial stages (limit=1), execute continuation inline to reduce
              // thread pool scheduling overhead. This is safe because we're already
              // on a worker thread and only one item can be in the stage at a time.
              // Depth-limit to prevent unbounded stack growth when the pool inlines.
              if (serial_) {
                static DISPENSO_THREAD_LOCAL int depth = 0;
                if (depth < kMaxPipelineInlineDepth) {
                  struct DepthGuard {
                    DISPENSO_INLINE DepthGuard(int& d) : d_(d) {
                      ++d_;
                    }
                    DISPENSO_INLINE ~DepthGuard() {
                      --d_;
                    }
                    int& d_;
                  };
                  DepthGuard dGuard(depth);
                  func();
                } else {
                  tasks_.schedule(std::move(func), ForceQueuingTag());
                }
              } else {
                tasks_.schedule(std::move(func));
              }
            } else {
              resources_.fetch_add(1, std::memory_order_acq_rel);
            }
          });
#if defined(__cpp_exceptions)
        } catch (...) {
          tasks_.trySetCurrentException();
        }
#endif
      });
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();

      while (resources_.fetch_sub(1, std::memory_order_acq_rel) > 0) {
        OnceFunction func;
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
        bool deqd = queue_.try_dequeue(func);
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
        if (deqd) {
          tasks_.schedule(std::move(func));
        } else {
          break;
        }
      }
      resources_.fetch_add(1, std::memory_order_acq_rel);
    }

    void wait() {
      if (!unlimited_) {
        // LIMITED path: drain items from the local queue into the
        // ConcurrentTaskSet, and spin until all outstanding items for this
        // stage have completed. We must spin on outstanding_ because in-flight
        // CTS tasks from the previous stage may call pipeNext_.execute() after
        // this drain starts, enqueuing new items into our local queue. Without
        // the outstanding_ check, those late-arriving items could be orphaned
        // if schedule()'s try_dequeue spuriously misses them.
        while (outstanding_.load(std::memory_order_acquire)) {
          if (tasks_.hasException()) {
            // Drain remaining queued items without executing them.
            OnceFunction discard;
            while (queue_.try_dequeue(discard)) {
              outstanding_.fetch_sub(1, std::memory_order_acq_rel);
              discard.cleanupNotRun();
            }
            break;
          }
          OnceFunction func;
          DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
          bool deqd = queue_.try_dequeue(func);
          DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
          if (deqd) {
            // Spin until a resource slot is available. Check for exceptions
            // each iteration to avoid deadlocking when all pool threads have
            // finished and no one will release a resource.
            while (resources_.fetch_sub(1, std::memory_order_acq_rel) <= 0) {
              resources_.fetch_add(1, std::memory_order_acq_rel);
              if (tasks_.hasException()) {
                outstanding_.fetch_sub(1, std::memory_order_acq_rel);
                func.cleanupNotRun();
                goto next_item;
              }
              if (!tasks_.tryExecuteNext()) {
                std::this_thread::yield();
              }
            }
            tasks_.schedule(std::move(func));
          next_item:;
          } else if (!tasks_.tryExecuteNext()) {
            std::this_thread::yield();
          }
        }
        return;
      }

      // Unified drain + wait loop. We must keep checking the local queue
      // because items can arrive after an earlier drain pass — this happens
      // when a completion callback's try_dequeue races with a concurrent
      // enqueue: the callback sees an empty queue and releases the resource
      // instead of chaining, leaving the item orphaned in the queue.
      // Re-checking on every iteration ensures we eventually dispatch it.
      while (outstanding_.load(std::memory_order_acquire)) {
        // For the unlimited path, items are scheduled directly to CTS (not
        // queued locally). When an exception occurs, remaining items are
        // already in CTS's pool queue wrapped by packageTask — CTS::wait()
        // will drain them. Break out here to avoid spinning.
        if (tasks_.hasException()) {
          break;
        }
        OnceFunction func;
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
        bool deqd = queue_.try_dequeue(func);
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
        if (deqd) {
          // Wait for resource to become available
          while (resources_.fetch_sub(1, std::memory_order_acq_rel) <= 0) {
            resources_.fetch_add(1, std::memory_order_acq_rel);
            if (!tasks_.tryExecuteNext()) {
              std::this_thread::yield();
            }
          }
          tasks_.schedule(std::move(func));
        } else if (!tasks_.tryExecuteNext()) {
          std::this_thread::yield();
        }
      }
    }

   private:
    ConcurrentTaskSet& tasks_;
    alignas(kCacheLineSize) std::atomic<ssize_t> resources_;
    alignas(kCacheLineSize) std::atomic<size_t> outstanding_{0};
    moodycamel::ConcurrentQueue<OnceFunction> queue_;
    // Note: In benchmarks, this doesn't seem to help very much (~1%), but using it should lower
    // resource requirements because the queue_ never needs to instantiate memory.
    const bool unlimited_;
    const bool serial_;
  };

  struct Deleter {
    void operator()(Impl* r) {
      r->~Impl();
      alignedFree(r);
    }
  };

  std::unique_ptr<Impl, Deleter> impl_;
};

template <typename F>
struct Stage {
  Stage(F&& fIn, ssize_t limitIn) : f(std::move(fIn)), limit(limitIn) {}

  template <typename T>
  auto operator()(T&& t) {
    return f(std::forward<T>(t));
  }

  auto operator()() {
    return f();
  }

  F f;
  ssize_t limit;
};

template <typename T>
struct StageLimits {
  constexpr static ssize_t limit(const T& /*t*/) {
    return 1;
  }
};

template <typename T>
struct StageLimits<Stage<T>> {
  static ssize_t limit(const Stage<T>& t) {
    return std::max(ssize_t{1}, t.limit);
  }
};

enum class StageClass { kSingleStage, kGenerator, kOpTransform, kTransform, kSink };

template <typename T>
struct TransformTraits {
  static constexpr StageClass kStageClass = StageClass::kTransform;
};

#if __cplusplus >= 201703L
template <typename T>
struct TransformTraits<std::optional<T>> {
  static constexpr StageClass kStageClass = StageClass::kOpTransform;
};
#endif // C++17

template <typename T>
struct TransformTraits<OpResult<T>> {
  static constexpr StageClass kStageClass = StageClass::kOpTransform;
};

template <typename T>
struct OptionalStrippedTraits {
  using Type = T;
};

#if __cplusplus >= 201703L
template <typename T>
struct OptionalStrippedTraits<std::optional<T>> {
  using Type = T;
};
#endif // C++17

template <typename T>
struct OptionalStrippedTraits<OpResult<T>> {
  using Type = T;
};

struct SinkPipe {};

template <StageClass stageClass, typename CurStage, typename PipeNext>
class Pipe;

template <typename CurStage, typename PipeNext>
class TransformPipe {
 public:
  template <typename StageIn>
  TransformPipe(ConcurrentTaskSet& tasks, StageIn&& s, PipeNext&& n)
      : stage_(std::forward<StageIn>(s)),
        tasks_(tasks, StageLimits<CurStage>::limit(stage_)),
        pipeNext_(std::move(n)) {}

  void wait() {
    tasks_.wait();
    pipeNext_.wait();
  }

 protected:
  CurStage stage_;
  LimitGatedScheduler tasks_;
  PipeNext pipeNext_;
};

template <typename CurStage, typename PipeNext>
class Pipe<StageClass::kTransform, CurStage, PipeNext> : public TransformPipe<CurStage, PipeNext> {
 public:
  template <typename StageIn>
  Pipe(ConcurrentTaskSet& tasks, StageIn&& s, PipeNext&& n)
      : TransformPipe<CurStage, PipeNext>(tasks, std::forward<StageIn>(s), std::move(n)) {}

  template <typename Input>
  void execute(Input&& input) {
    this->tasks_.schedule([input = std::move(input), this](auto&& stageCompleteFunc) mutable {
      auto&& res = this->stage_(std::move(input));
      stageCompleteFunc();
      this->pipeNext_.execute(res);
    });
  }
};

template <typename CurStage, typename PipeNext>
class Pipe<StageClass::kOpTransform, CurStage, PipeNext>
    : public TransformPipe<CurStage, PipeNext> {
 public:
  template <typename StageIn>
  Pipe(ConcurrentTaskSet& tasks, StageIn&& s, PipeNext&& n)
      : TransformPipe<CurStage, PipeNext>(tasks, std::forward<StageIn>(s), std::move(n)) {}

  template <typename Input>
  void execute(Input&& input) {
    this->tasks_.schedule([input = std::move(input), this](auto&& stageCompleteFunc) mutable {
      auto op = this->stage_(std::move(input));
      stageCompleteFunc();
      if (op) {
        this->pipeNext_.execute(std::move(op.value()));
      }
    });
  }
};

template <typename CurStage, typename PipeNext>
class Pipe<StageClass::kGenerator, CurStage, PipeNext> {
 public:
  template <typename StageIn>
  Pipe(ConcurrentTaskSet& tasks, StageIn&& s, PipeNext&& n)
      : tasks_(tasks), stage_(std::forward<StageIn>(s)), pipeNext_(std::move(n)) {}

  void execute() {
    ssize_t numThreads = std::max<ssize_t>(
        1, std::min(tasks_.numPoolThreads(), StageLimits<CurStage>::limit(stage_)));
    completion_ = std::make_unique<CompletionEventImpl>(static_cast<int>(numThreads));
    for (ssize_t i = 0; i < numThreads; ++i) {
      tasks_.schedule([this]() {
        // RAII guard ensures the completion event is signaled even if an exception
        // propagates out of pipeNext_.execute() (e.g. when ConcurrentTaskSet runs a
        // downstream stage inline and it throws). Without this, wait() would hang on
        // completion_->wait(0) because the count is never decremented.
        struct CompletionGuard {
          DISPENSO_INLINE ~CompletionGuard() {
            if (completion->intrusiveStatus().fetch_sub(1, std::memory_order_acq_rel) == 1) {
              completion->notify(0);
            }
          }
          CompletionEventImpl* completion;
        };
        CompletionGuard cGuard{completion_.get()};

        while (!tasks_.hasException()) {
          auto op = stage_();
          if (!op) {
            break;
          }
          pipeNext_.execute(std::move(op.value()));
        }
      });
    }
  }

  void wait() {
    completion_->wait(0);
    pipeNext_.wait();
    tasks_.wait();
  }

 private:
  ConcurrentTaskSet& tasks_;
  std::unique_ptr<CompletionEventImpl> completion_;
  CurStage stage_;
  PipeNext pipeNext_;
};

template <typename CurStage>
class Pipe<StageClass::kSingleStage, CurStage, SinkPipe> {
 public:
  template <typename StageIn>
  Pipe(ConcurrentTaskSet& tasks, StageIn&& s) : tasks_(tasks), stage_(std::forward<StageIn>(s)) {}

  void execute() {
    size_t numThreads = std::min(tasks_.numPoolThreads(), StageLimits<CurStage>::limit(stage_));
    for (size_t i = 0; i < numThreads; ++i) {
      tasks_.schedule([this]() {
        while (!tasks_.hasException() && stage_()) {
        }
      });
    }
  }

  void wait() {
    tasks_.wait();
  }

 private:
  ConcurrentTaskSet& tasks_;
  CurStage stage_;
};

template <typename CurStage>
class Pipe<StageClass::kSink, CurStage, SinkPipe> {
 public:
  template <typename StageIn>
  Pipe(ConcurrentTaskSet& tasks, StageIn&& s)
      : stage_(std::forward<StageIn>(s)), tasks_(tasks, StageLimits<CurStage>::limit(stage_)) {}

  template <typename Input>
  void execute(Input&& input) {
    tasks_.schedule([input = std::move(input), this](auto&& stageCompleteFunc) mutable {
      stage_(std::move(input));
      stageCompleteFunc();
    });
  }

  void wait() {
    tasks_.wait();
  }

 private:
  CurStage stage_;
  LimitGatedScheduler tasks_;
};

template <typename InputType, typename Stage0>
auto makePipesHelper(ConcurrentTaskSet& tasks, Stage0&& sCur) {
  return Pipe<StageClass::kSink, Stage0, SinkPipe>(tasks, std::forward<Stage0>(sCur));
}

template <typename InputType, typename Stage0, typename Stage1, typename... Stages>
auto makePipesHelper(
    ConcurrentTaskSet& tasks,
    Stage0&& sCur,
    Stage1&& sNext,
    Stages&&... sFollowing) {
  using Stage0Result = ResultOf<Stage0, typename OptionalStrippedTraits<InputType>::Type>;

  auto pipe = makePipesHelper<Stage0Result>(
      tasks, std::forward<Stage1>(sNext), std::forward<Stages>(sFollowing)...);

  constexpr StageClass kSc = TransformTraits<Stage0Result>::kStageClass;
  return Pipe<kSc, Stage0, decltype(pipe)>(tasks, std::forward<Stage0>(sCur), std::move(pipe));
}

template <typename Stage0, typename Stage1, typename... Stages>
auto makePipes(ConcurrentTaskSet& tasks, Stage0&& sCur, Stage1&& sNext, Stages&&... sFollowing) {
  auto pipe = makePipesHelper<ResultOf<Stage0>>(
      tasks, std::forward<Stage1>(sNext), std::forward<Stages>(sFollowing)...);
  return Pipe<StageClass::kGenerator, Stage0, decltype(pipe)>(
      tasks, std::forward<Stage0>(sCur), std::move(pipe));
}

template <typename Stage0>
auto makePipes(ConcurrentTaskSet& tasks, Stage0&& sCur) {
  return Pipe<StageClass::kSingleStage, Stage0, SinkPipe>(tasks, std::forward<Stage0>(sCur));
}

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/quanta.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#ifdef _WIN32
#include <Windows.h>
#include <timeapi.h>
#endif

#include <dispenso/detail/quanta.h>

namespace dispenso {
#ifdef _WIN32

namespace {
struct OsQuantaSetter {
  OsQuantaSetter() {
    timeBeginPeriod(1);
  }
  ~OsQuantaSetter() {
    timeEndPeriod(1);
  }
};
} // namespace
#else
namespace {
struct OsQuantaSetter {};
} // namespace

#endif // _WIN32

namespace detail {
void registerFineSchedulerQuanta() {
  static OsQuantaSetter setter;
  (void)setter;
}
} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/quanta.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

namespace dispenso {
namespace detail {
void registerFineSchedulerQuanta();
} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/result_of.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <type_traits>

namespace dispenso {
namespace detail {

#if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L
template <typename F, typename... Args>
using ResultOf = typename std::invoke_result_t<std::decay_t<F>, std::decay_t<Args>...>;
#else
template <typename F, typename... Args>
using ResultOf =
    typename std::result_of<typename std::decay<F>::type(typename std::decay<Args>::type...)>::type;
#endif // c++17

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/rw_lock_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/platform.h>

namespace dispenso {
namespace detail {
class RWLockImpl {
 public:
  /**
   * Locks for write access
   *
   * @note It is undefined behavior to recursively lock
   **/
  void lock();

  /**
   * Tries to lock for write access, returns if unable to lock
   *
   * @return true if lock was acquired, false otherwise
   **/
  bool try_lock();

  /**
   * Unlocks write access
   *
   * @note Must already be locked by the current thread of execution, otherwise, the behavior is
   * undefined.
   **/
  void unlock();

  /**
   * Locks for read access
   *
   * @note It is undefined behavior to recursively lock
   **/
  void lock_shared();

  /**
   * Tries to lock for read access, returns if unable to lock
   *
   * @return true if lock was acquired, false otherwise
   *
   * @note It is undefined behavior to recursively lock
   **/
  bool try_lock_shared();

  /**
   * Unlocks read access
   *
   * @note Must already be locked by the current thread of execution, otherwise, the behavior is
   * undefined.
   **/
  void unlock_shared();

  /**
   * Upgrade from a reader lock to a writer lock.  lock_upgrade is a power-user interface.  There is
   * a very good reason why it is not exposed as upgrade_mutex in the standard.  To use it safely,
   * you *MUST* ensure only one thread can try to lock for write concurrently.  If that cannot be
   * guaranteed, you should unlock for read, and lock for write instead of using lock_upgrade to
   * avoid potential deadlock.
   *
   * @note Calling this if the writer lock is already held, or if no reader lock is already held is
   * undefined behavior.
   **/
  void lock_upgrade();

  /**
   * Downgrade the lock from a writer lock to a reader lock.
   *
   * @note Calling this if the writer lock is not held results in undefined behavior
   **/
  void lock_downgrade();

 private:
  static constexpr uint32_t kWriteBit = 0x80000000;
  static constexpr uint32_t kReaderBits = 0x7fffffff;
  std::atomic<uint32_t> lock_{0};
};

inline void RWLockImpl::lock() {
  uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
  while (val & kWriteBit) {
    val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
  }
  // We've claimed single write ownership now.  We need to drain off readers
  while (val != kWriteBit) {
    val = lock_.load(std::memory_order_acquire);
  }
}

inline bool RWLockImpl::try_lock() {
  uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
  return !(val & kWriteBit);
}

inline void RWLockImpl::unlock() {
  lock_.fetch_and(kReaderBits, std::memory_order_acq_rel);
}

inline void RWLockImpl::lock_shared() {
  uint32_t val = lock_.fetch_add(1, std::memory_order_acq_rel);
  while (val & kWriteBit) {
    val = lock_.fetch_sub(1, std::memory_order_acq_rel);
    while (val & kWriteBit) {
      val = lock_.load(std::memory_order_acquire);
    }

    val = lock_.fetch_add(1, std::memory_order_acq_rel);
  }
}

inline bool RWLockImpl::try_lock_shared() {
  uint32_t val = lock_.fetch_add(1, std::memory_order_acq_rel);
  if (val & kWriteBit) {
    lock_.fetch_sub(1, std::memory_order_acq_rel);
    return false;
  }
  return true;
}

inline void RWLockImpl::unlock_shared() {
  lock_.fetch_sub(1, std::memory_order_acq_rel);
}

inline void RWLockImpl::lock_upgrade() {
  uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
  while (val & kWriteBit) {
    val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
  }
  // We've claimed single write ownership now.  We need to drain off readers, including ourself
  lock_.fetch_sub(1, std::memory_order_acq_rel);
  while (val != kWriteBit) {
    val = lock_.load(std::memory_order_acquire);
  }
}

inline void RWLockImpl::lock_downgrade() {
  // Get reader ownership first
  lock_.fetch_add(1, std::memory_order_acq_rel);
  unlock();
}
} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/small_buffer_allocator_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <vector>

#include <dispenso/detail/math.h>
#include <dispenso/platform.h>
#include <dispenso/tsan_annotations.h>

#include <moodycamel/concurrentqueue.h>

namespace dispenso {
namespace detail {

struct SmallBufferGlobals {
  moodycamel::ConcurrentQueue<char*> centralStore;
  std::vector<char*> backingStore;
  std::atomic<uint32_t> backingStoreLock{0};

  ~SmallBufferGlobals() {
    for (char* b : backingStore) {
      alignedFree(b);
    }
  }
};

template <size_t kChunkSize>
DISPENSO_DLL_ACCESS SmallBufferGlobals& getSmallBufferGlobals();

/**
 * A class for allocating small chunks of memory quickly.  The class is built on concepts of
 * thread-local pools of buffers of specific sizes.  It is best to limit the distinct number of
 * sizes of chunk sizes used in one program or there is a good chance that there may be a lot of
 * unused memory allocated in the system.
 *
 * <code>SmallBufferAllocator</code> is completely thread-safe.
 **/
template <size_t kChunkSize>
class SmallBufferAllocator {
 private:
  static constexpr size_t kLogFactor = log2const(kChunkSize | 1);

  // TODO(T88183021): Make these factors compile-time configurable.  For example, the current values
  // can lead to megabytes of data being allocated, even if the allocator is only used for one or
  // two allocations.  Likely we can reduce these sizes by a decent factor without affecting
  // benchmarks, and then reduce them even further as an option.
  static constexpr size_t kMallocBytes = (1 << 12) * kLogFactor;
  static constexpr size_t kIdealTLCacheBytes = kMallocBytes / 4;
  static constexpr size_t kIdealNumTLBuffers = kIdealTLCacheBytes / kChunkSize;
  static constexpr size_t kMaxNumTLBuffers = 2 * kIdealNumTLBuffers;
  static constexpr size_t kBuffersPerMalloc = kMallocBytes / kChunkSize;

  static_assert(kIdealNumTLBuffers > 0, "Must have a positive number of buffers to work with");

 public:
  /**
   * Allocate a buffer of <code>kChunkSize</code> bytes.
   *
   * @return a pointer to the buffer.
   **/
  static char* alloc() {
    auto bnc = buffersAndCount();
    char** tlBuffers = std::get<0>(bnc);
    size_t& tlCount = std::get<1>(bnc);
    if (!tlCount) {
      // We only need to register (at least) once when we grab from the central store; without going
      // to the central store at least once (or calling dealloc first), we cannot have buffers to
      // return.
      registerCleanup();
      tlCount = grabFromCentralStore(tlBuffers);
    }
    return tlBuffers[--tlCount];
  }

  /**
   * Deallocate a buffer previously allocated via <code>alloc</code>
   *
   * @param buffer The buffer to deallocate.
   **/
  static void dealloc(char* buffer) {
    // We need to register at least once for any call to dealloc, because we may dealloc on this
    // thread without having allocated on the same thread, and if we don't register, any memory in
    // the thread-local buffers will not be returned to the central store on thread destruction.
    auto bnc = buffersAndCount();
    char** tlBuffers = std::get<0>(bnc);
    size_t& tlCount = std::get<1>(bnc);
    registerCleanup();
    tlBuffers[tlCount++] = buffer;
    if (tlCount == kMaxNumTLBuffers) {
      recycleToCentralStore(tlBuffers + kIdealNumTLBuffers, kIdealNumTLBuffers);
      tlCount -= kIdealNumTLBuffers;
    }
  }

  /**
   * Get the approximate number of underlying bytes allocated by the allocator.  This is mostly for
   * testing and debugging.
   *
   * @return The approximate number of bytes currently allocated.
   **/
  static size_t bytesAllocated() {
    uint32_t allocId = 0;
    auto& globals = getSmallBufferGlobals<kChunkSize>();
    auto& lock = globals.backingStoreLock;
    while (!lock.compare_exchange_weak(allocId, 1, std::memory_order_acquire)) {
    }
    size_t bytes = kMallocBytes * globals.backingStore.size();
    lock.store(0, std::memory_order_release);
    return bytes;
  }

 private:
  struct PerThreadQueuingData {
    PerThreadQueuingData(
        moodycamel::ConcurrentQueue<char*>& cstore,
        std::tuple<char**, size_t&> buffersAndCount)
        : cstore_(cstore),
          buffers_(std::get<0>(buffersAndCount)),
          count_(std::get<1>(buffersAndCount)) {
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
      new (ptokenBuf_) moodycamel::ProducerToken(cstore);
      new (ctokenBuf_) moodycamel::ConsumerToken(cstore);
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
    }

    ~PerThreadQueuingData();

    void enqueue_bulk(char** buffers, size_t count) {
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
      cstore_.enqueue_bulk(ptoken(), buffers, count);
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
    }

    size_t try_dequeue_bulk(char** buffers, size_t count) {
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
      size_t actual = cstore_.try_dequeue_bulk(ctoken(), buffers, count);
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
      return actual;
    }

   private:
    moodycamel::ProducerToken& ptoken() {
      return *reinterpret_cast<moodycamel::ProducerToken*>(ptokenBuf_);
    }
    moodycamel::ConsumerToken& ctoken() {
      return *reinterpret_cast<moodycamel::ConsumerToken*>(ctokenBuf_);
    }

    moodycamel::ConcurrentQueue<char*>& cstore_;
    alignas(moodycamel::ProducerToken) char ptokenBuf_[sizeof(moodycamel::ProducerToken)];
    alignas(moodycamel::ConsumerToken) char ctokenBuf_[sizeof(moodycamel::ConsumerToken)];
    char** buffers_;
    size_t& count_;
  };

  static void registerCleanup() {
    // Note that this would be better/cheaper as a static thread_local member; however, there are
    // currently bugs in multiple compilers that prevent the destructor from being called properly
    // in that context.  A workaround that appears to work more portably is to put this here, and
    // ensure registerCleanup is called for any alloc or dealloc, even though this may have a small
    // runtime cost.
    (void)getThreadQueuingData();
  }

  static size_t grabFromCentralStore(char** buffers) {
    auto& queue = getThreadQueuingData();
    auto& globals = getSmallBufferGlobals<kChunkSize>();
    auto& lock = globals.backingStoreLock;
    auto& backingStore = globals.backingStore;
    while (true) {
      size_t grabbed = queue.try_dequeue_bulk(buffers, kIdealNumTLBuffers);
      if (grabbed) {
        return grabbed;
      }
      uint32_t allocId = lock.fetch_add(1, std::memory_order_acquire);
      if (allocId == 0) {
        char* buffer = reinterpret_cast<char*>(detail::alignedMalloc(kMallocBytes, kChunkSize));
        backingStore.push_back(buffer);

        constexpr size_t kNumToPush = kBuffersPerMalloc - kIdealNumTLBuffers;
        char* topush[kNumToPush];
        for (size_t i = 0; i < kNumToPush; ++i, buffer += kChunkSize) {
          topush[i] = buffer;
        }
        queue.enqueue_bulk(topush, kNumToPush);
        lock.store(0, std::memory_order_release);
        for (size_t i = 0; i < kIdealNumTLBuffers; ++i, buffer += kChunkSize) {
          buffers[i] = buffer;
        }
        return kIdealNumTLBuffers;
      } else {
        while (lock.load(std::memory_order_relaxed)) {
          std::this_thread::yield();
        }
      }
    }
  }

  static void recycleToCentralStore(char** buffers, size_t numToRecycle) {
    getThreadQueuingData().enqueue_bulk(buffers, numToRecycle);
    // TODO(bbudge): consider whether we need to do any garbage collection and return memory to
    // the system.
  }

 private:
  DISPENSO_DLL_ACCESS static std::tuple<char**, size_t&> buffersAndCount() {
    static DISPENSO_THREAD_LOCAL char* tlBuffers[kMaxNumTLBuffers];
    static DISPENSO_THREAD_LOCAL size_t tlCount = 0;
    return {tlBuffers, tlCount};
  };
  DISPENSO_DLL_ACCESS static PerThreadQueuingData& getThreadQueuingData() {
    static thread_local PerThreadQueuingData data(
        getSmallBufferGlobals<kChunkSize>().centralStore, buffersAndCount());
    return data;
  }
};

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/detail/task_set_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file task_set.h
 * A file providing TaskSet and ConcurrentTaskSet.  These interfaces allow the user to
 * submit/schedule multiple closures and then wait on them.
 **/

#pragma once

#include <dispenso/thread_pool.h>

namespace dispenso {

class TaskSetBase;

namespace detail {
template <typename Result>
class FutureBase;

class LimitGatedScheduler;

DISPENSO_DLL_ACCESS void pushThreadTaskSet(TaskSetBase* tasks);
DISPENSO_DLL_ACCESS void popThreadTaskSet();

} // namespace detail

DISPENSO_DLL_ACCESS TaskSetBase* parentTaskSet();

class TaskSetBase {
 public:
  TaskSetBase(
      ThreadPool& p,
      ParentCascadeCancel registerForParentCancel = ParentCascadeCancel::kOff,
      ssize_t stealingLoadMultiplier = 4)
      : pool_(p), taskSetLoadFactor_(stealingLoadMultiplier * p.numThreads()) {
#if defined DISPENSO_DEBUG
    assert(stealingLoadMultiplier > 0);
    pool_.outstandingTaskSets_.fetch_add(1, std::memory_order_acquire);
#endif

    parent_ = (registerForParentCancel == ParentCascadeCancel::kOn) ? parentTaskSet() : nullptr;

    if (parent_) {
      parent_->registerChild(this);
      if (parent_->canceled()) {
        canceled_.store(true, std::memory_order_release);
      }
    }
  }

  TaskSetBase(TaskSetBase&& other) = delete;
  TaskSetBase& operator=(TaskSetBase&& other) = delete;

  ssize_t numPoolThreads() const {
    return pool_.numThreads();
  }

  ThreadPool& pool() {
    return pool_;
  }

  void cancel() {
    canceled_.store(true, std::memory_order_release);
    cancelChildren();
  }

  bool canceled() const {
    return canceled_.load(std::memory_order_acquire);
  }

  /**
   * Check whether an exception has been captured by this task set.
   * When exceptions are disabled at compile time, this always returns false,
   * allowing the compiler to eliminate exception-related branches entirely.
   **/
#if defined(__cpp_exceptions)
  bool hasException() const {
    return guardException_.load(std::memory_order_acquire) != kUnset;
  }
#else
  constexpr bool hasException() const {
    return false;
  }
#endif

  ~TaskSetBase() {
#if defined DISPENSO_DEBUG
    pool_.outstandingTaskSets_.fetch_sub(1, std::memory_order_release);
#endif

    if (parent_) {
      parent_->unregisterChild(this);
    }
  }

 protected:
  template <typename F>
  auto packageTask(F&& f) {
    outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
    return [this, f = std::move(f)]() mutable {
      // Skip push/pop if this TaskSet is already the current parent on this
      // thread. This happens with ConcurrentTaskSet self-recursion (scheduling
      // tasks from within tasks on the same set), which is normal and expected.
      // Only actual nesting of *different* TaskSets needs stack tracking.
      bool pushed = (parentTaskSet() != this);
      if (pushed) {
        detail::pushThreadTaskSet(this);
      }
      if (!canceled_.load(std::memory_order_acquire)) {
#if defined(__cpp_exceptions)
        try {
          f();
        } catch (...) {
          trySetCurrentException();
        }
#else
        f();
#endif // __cpp_exceptions
      }
      if (pushed) {
        detail::popThreadTaskSet();
      }
      outstandingTaskCount_.fetch_sub(1, std::memory_order_release);
    };
  }

  // Package a task without incrementing the counter (for bulk scheduling where
  // the counter is incremented once for the entire batch).
  template <typename F>
  auto packageTaskNoIncrement(F&& f) {
    return [this, f = std::move(f)]() mutable {
      detail::pushThreadTaskSet(this);
      if (!canceled_.load(std::memory_order_acquire)) {
#if defined(__cpp_exceptions)
        try {
          f();
        } catch (...) {
          trySetCurrentException();
        }
#else
        f();
#endif // __cpp_exceptions
      }
      detail::popThreadTaskSet();
      outstandingTaskCount_.fetch_sub(1, std::memory_order_release);
    };
  }

  template <typename Generator>
  void scheduleBulkImpl(size_t count, Generator&& gen, moodycamel::ProducerToken* token) {
    if (count == 0) {
      return;
    }

    ssize_t numPool = pool_.numThreads();
    size_t chunkSize = static_cast<size_t>(numPool) + static_cast<size_t>(numPool) / 2;
    if (chunkSize < 1) {
      chunkSize = 1;
    }

    size_t i = 0;
    while (i < count) {
      if (canceled()) {
        break;
      }
      ssize_t outstanding = outstandingTaskCount_.load(std::memory_order_relaxed);
      ssize_t curWork = pool_.workRemaining_.load(std::memory_order_relaxed);
      // Mirror the two-tier inline decision from ThreadPool::schedule():
      // 1. TaskSet-level: outstandingTaskCount_ exceeds our own load factor
      // 2. Pool recursive: tight quickLoadFactor (numThreads*1.5) when called from a pool
      //    thread. Critical for nested patterns where inner scheduleBulk must throttle to
      //    avoid flooding the pool queue and starving outer tasks.
      // 3. Pool global: loose poolLoadFactor_ (numThreads*32) for non-recursive callers
      if (outstanding > taskSetLoadFactor_ ||
          (detail::PerPoolPerThreadInfo::isPoolRecursive(&pool_) &&
           curWork > numPool + numPool / 2) ||
          curWork > pool_.poolLoadFactor_.load(std::memory_order_relaxed)) {
#if defined(__cpp_exceptions)
        try {
          gen(i)();
        } catch (...) {
          trySetCurrentException();
        }
#else
        gen(i)();
#endif // __cpp_exceptions
        ++i;
      } else {
        ssize_t room = taskSetLoadFactor_ - outstanding;
        size_t toEnqueue = std::min({count - i, chunkSize, static_cast<size_t>(room)});
        if (toEnqueue == 0) {
          toEnqueue = 1;
        }
        outstandingTaskCount_.fetch_add(static_cast<ssize_t>(toEnqueue), std::memory_order_acquire);
        size_t base = i;
        pool_.scheduleBulkEnqueue(
            toEnqueue,
            [this, &gen, base](size_t j) { return packageTaskNoIncrement(gen(base + j)); },
            token);
        i += toEnqueue;
      }
    }
  }

  DISPENSO_DLL_ACCESS void trySetCurrentException();
  bool testAndResetException();

  void registerChild(TaskSetBase* child) {
    std::lock_guard<std::mutex> lk(mtx_);

    child->prev_ = tail_;
    child->next_ = nullptr;
    if (tail_) {
      tail_->next_ = child;
      tail_ = child;
    } else {
      head_ = tail_ = child;
    }
  }

  void unregisterChild(TaskSetBase* child) {
    std::lock_guard<std::mutex> lk(mtx_);

    if (child->prev_) {
      child->prev_->next_ = child->next_;
    } else {
      // We're head
      assert(child == head_);
      head_ = child->next_;
    }
    if (child->next_) {
      child->next_->prev_ = child->prev_;
    } else {
      // We're tail
      assert(child == tail_);
      tail_ = child->prev_;
    }
  }

  void cancelChildren() {
    std::lock_guard<std::mutex> lk(mtx_);

    auto* node = head_;
    while (node) {
      node->cancel();
      node = node->next_;
    }
  }

  alignas(kCacheLineSize) std::atomic<ssize_t> outstandingTaskCount_{0};
  alignas(kCacheLineSize) ThreadPool& pool_;
  alignas(kCacheLineSize) std::atomic<bool> canceled_{false};
  const ssize_t taskSetLoadFactor_;
  // Always present to ensure stable ABI layout regardless of __cpp_exceptions.
  enum ExceptionState { kUnset, kSetting, kSet };
  std::atomic<ExceptionState> guardException_{kUnset};
  std::exception_ptr exception_;

  TaskSetBase* parent_;

  // This mutex guards modifications/use of the intusive linked list between head_ and tail_
  std::mutex mtx_;
  TaskSetBase* head_{nullptr};
  TaskSetBase* tail_{nullptr};

  // prev_ and next_ are links in our *parent's* intrusive linked list.
  TaskSetBase* prev_;
  TaskSetBase* next_;
};

} // namespace dispenso


================================================
FILE: dispenso/detail/timed_task_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/thread_pool.h>

namespace dispenso {
namespace detail {

enum FunctionFlags : uint32_t { kFFlagsNone = 0, kFFlagsDetached = 1, kFFlagsCancelled = 2 };

struct TimedTaskImpl {
  alignas(kCacheLineSize) std::atomic<size_t> count{0};
  std::atomic<size_t> timesToRun;
  std::atomic<uint32_t> flags{kFFlagsNone};
  std::atomic<uint32_t> inProgress{0};
  double nextAbsTime;
  double period;
  bool steady;
  std::function<void(std::shared_ptr<TimedTaskImpl>)> func;

  template <typename F, typename Schedulable>
  TimedTaskImpl(size_t times, double next, double per, F&& f, Schedulable& sched, bool stdy)
      : timesToRun(times), nextAbsTime(next), period(per), steady(stdy) {
    func = [&sched, f = std::move(f), this](std::shared_ptr<TimedTaskImpl> me) {
      if (flags.load(std::memory_order_acquire) & kFFlagsCancelled) {
        return;
      }

      inProgress.fetch_add(1, std::memory_order_acq_rel);

      auto wrap = [&f, this, me = std::move(me)]() mutable {
        if (!(flags.load(std::memory_order_acquire) & kFFlagsCancelled)) {
          if (!f()) {
            timesToRun.store(0, std::memory_order_release);
            flags.fetch_or(kFFlagsCancelled, std::memory_order_acq_rel);
            func = {};
          }
          count.fetch_add(1, std::memory_order_acq_rel);
        }

        inProgress.fetch_sub(1, std::memory_order_release);
        me.reset();
      };
      sched.schedule(wrap, ForceQueuingTag());
    };
  }
};

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/dispenso.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file dispenso.h
 * @brief Convenience header that includes all public dispenso headers.
 *
 * For finer-grained control over compile times, include individual headers instead.
 **/

#pragma once

// Core threading primitives
#include <dispenso/future.h>
#include <dispenso/schedulable.h>
#include <dispenso/task_set.h>
#include <dispenso/thread_pool.h>

// Parallel algorithms
#include <dispenso/for_each.h>
#include <dispenso/parallel_for.h>
#include <dispenso/pipeline.h>

// Graph-based task scheduling
#include <dispenso/graph.h>
#include <dispenso/graph_executor.h>

// Concurrent containers
#include <dispenso/concurrent_object_arena.h>
#include <dispenso/concurrent_vector.h>

// Synchronization primitives
#include <dispenso/completion_event.h>
#include <dispenso/latch.h>
#include <dispenso/rw_lock.h>

// Memory management
#include <dispenso/pool_allocator.h>
#include <dispenso/resource_pool.h>
#include <dispenso/small_buffer_allocator.h>

// Async utilities
#include <dispenso/async_request.h>
#include <dispenso/once_function.h>
#include <dispenso/timed_task.h>

// Utilities
#include <dispenso/platform.h>
#include <dispenso/priority.h>
#include <dispenso/thread_id.h>
#include <dispenso/timing.h>
#include <dispenso/util.h>

// Sanitizer support
#include <dispenso/tsan_annotations.h>


================================================
FILE: dispenso/fast_math/README.md
================================================
# dispenso::fast_math

> **EXPERIMENTAL** — This sublibrary is under active development. The API is
> unstable and subject to breaking changes without notice. Do not depend on it
> in production code.
>
> fast_math is not built by default. To include it in the CMake build, set
> `-DDISPENSO_BUILD_FAST_MATH=ON`.

A fast, accurate, SIMD-friendly math library for `float` transcendentals.

- [Design Goals](#design-goals)
- [Quick Start](#quick-start)
- [Accuracy Traits](#accuracy-traits)
- [SIMD Support](#simd-support)
- [Function Reference](#function-reference)
- [Performance](#performance)

## Design Goals

- **Fast**: 1.5-6x faster than glibc scalar math, with near-linear SIMD scaling
- **Accurate**: Drop-in replacements for `std::` math functions (1-4 ULP typical)
- **SIMD-native**: Same template works for `float`, `__m128`, `__m256`, `__m512`,
  `float32x4_t`, and Highway vectors — no source changes needed
- **Configurable**: `AccuracyTraits` template parameter trades speed for precision
  and edge-case handling at compile time

## Quick Start

```cpp
#include <dispenso/fast_math/fast_math.h>

namespace dfm = dispenso::fast_math;

// Scalar — drop-in replacement for std::sinf
float y = dfm::sin(x);

// SSE4.1 (4-wide) — same function, SIMD type
__m128 y4 = dfm::sin(x4);

// AVX2 (8-wide)
__m256 y8 = dfm::sin(x8);

// Max accuracy with bounds checking
float y = dfm::sin<float, dfm::MaxAccuracyTraits>(x);
```

For auto-detected best SIMD width:
```cpp
#include <dispenso/fast_math/simd.h>

using F = dispenso::fast_math::DefaultSimdFloat;
F result = dispenso::fast_math::sin(F(1.0f));
```

## Accuracy Traits

All functions accept an optional `AccuracyTraits` template parameter:

| Trait | `kMaxAccuracy` | `kBoundsValues` | Description |
|:------|:-:|:-:|:---|
| `DefaultAccuracyTraits` | false | false | Fastest. Correct over normal-range inputs. |
| `MaxAccuracyTraits` | true | true | Highest accuracy + edge-case handling (NaN, Inf, denorm). |

Custom traits can mix these independently:
```cpp
struct BoundsOnly {
  static constexpr bool kMaxAccuracy = false;
  static constexpr bool kBoundsValues = true;  // Handle NaN/Inf, keep fast polynomials
};
float y = dfm::exp<float, BoundsOnly>(x);
```

## SIMD Support

### Supported Types

| Backend | Type | Width | Guard Macro |
|:--------|:-----|:-----:|:------------|
| Scalar | `float` | 1 | always |
| SSE4.1 | `__m128` | 4 | `__SSE4_1__` |
| AVX2 | `__m256` | 8 | `__AVX2__` |
| AVX-512 | `__m512` | 16 | `__AVX512F__` |
| NEON | `float32x4_t` | 4 | `__aarch64__` |
| Highway | `HwyFloat` | variable | `__has_include("hwy/highway.h")` |

All SIMD backends provide `FloatTraits` specializations with the required operations
(`fma`, `sqrt`, `conditional`, `bit_cast`, etc.). Include `<dispenso/fast_math/fast_math.h>`
and the appropriate backend headers are auto-included based on platform macros.

### How It Works

Every function is templated on `Flt`. When `Flt` is a SIMD vector type, all
operations (arithmetic, comparisons, bit manipulation) dispatch through
`FloatTraits<Flt>` specializations that map to the corresponding intrinsics.
Polynomial coefficients stay as scalar `float` arrays — the implicit
`SseFloat(float)` / `_mm256_set1_ps` broadcast is efficient on modern hardware.

## Function Reference

### Trigonometric

| Function | Signature | Domain | Max ULP (Default) | Max ULP (MaxAccuracy) |
|:---------|:----------|:-------|:--:|:--:|
| `sin` | `sin(x)` | all float | 1 ([-128pi,128pi]), 2 ([-1Mpi,1Mpi]) | 1 ([-1Mpi,1Mpi]), 2 (full) |
| `cos` | `cos(x)` | all float | 1 ([-128pi,128pi]), 2 ([-1Mpi,1Mpi]) | 1 ([-128pi,128pi]), 2 ([-1Mpi,1Mpi]) |
| `tan` | `tan(x)` | all float | 3 ([-128Kpi,128Kpi]), 32 ([-1Mpi,1Mpi]) | 3 ([-128Kpi,128Kpi]), 4 ([-1Mpi,1Mpi]) |

### Inverse Trigonometric

| Function | Signature | Domain | Max ULP |
|:---------|:----------|:-------|:--:|
| `acos` | `acos(x)` | [-1, 1] | 3 |
| `asin` | `asin(x)` | [-1, 1] | 2 |
| `atan` | `atan(x)` | all float | 3 |
| `atan2` | `atan2(y, x)` | all float | 3 (Default), 3 (MaxAccuracy, +Inf handling) |

### Exponential

| Function | Signature | Domain | Max ULP (Default) | Max ULP (MaxAccuracy) |
|:---------|:----------|:-------|:--:|:--:|
| `exp` | `exp(x)` | [-89, 89] | 3 | 1 |
| `exp2` | `exp2(x)` | [-127, 128] | 1 | 1 |
| `exp10` | `exp10(x)` | [-40, 40] | 2 | 2 |
| `expm1` | `expm1(x)` | all float | 2 | 1 |

With `kBoundsValues = true`, exp/exp2/exp10 correctly handle NaN, +/-Inf, and
out-of-range inputs (returning 0 for large negative, Inf for large positive).

`expm1` computes exp(x) - 1 with precision near zero using Cody-Waite range
reduction and Sollya-optimized polynomials. For x < -17, returns -1 exactly
(exp(x) is below float ULP of 1).

### Logarithmic

| Function | Signature | Domain | Max ULP (Default) | Max ULP (MaxAccuracy) |
|:---------|:----------|:-------|:--:|:--:|
| `log` | `log(x)` | (0, +Inf) | 2 | 2 |
| `log1p` | `log1p(x)` | (-1, +Inf) | 2 | 2 |
| `log2` | `log2(x)` | (0, +Inf) | 1 | 1 |
| `log10` | `log10(x)` | (0, +Inf) | 3 | 3 |

With `kBoundsValues = true`, log/log2/log10 correctly handle 0, denormals,
Inf, and NaN inputs.

`log1p` computes log(1 + x) with precision near zero using compensated
addition: captures the rounding error of 1 + x and applies a first-order
correction. Bounds handling inherited from log.

### Other

| Function | Signature | Domain | Max ULP (Default) | Max ULP (MaxAccuracy) |
|:---------|:----------|:-------|:--:|:--:|
| `sqrt` | `sqrt(x)` | all float | 0 | 0 |
| `cbrt` | `cbrt(x)` | all float | 12 | 3 |
| `hypot` | `hypot(x, y)` | all float | 1 | 1 |
| `pow` | `pow(x, y)` | all float | 1 | 1 |
| `tanh` | `tanh(x)` | all float | 2 | 2 |
| `erf` | `erf(x)` | all float | 2 | 2 |
| `frexp` | `frexp(x, &e)` | all float | 0 | 0 |
| `ldexp` | `ldexp(x, e)` | all float | 0 | 0 |

`sqrt` delegates to hardware `sqrt`. `frexp` and `ldexp` are bit-accurate.

`hypot` uses double-precision cast for scalar and dynamic exponent
scaling with Newton refinement for SIMD (~1 ULP). Overflow-safe for all normal
floats (up to ~2^126). With `kBoundsValues = true`, hypot correctly handles
IEEE 754 boundary conditions: `hypot(±inf, y) = +inf` even when `y` is NaN,
and `hypot(NaN, finite) = NaN`.

`pow` uses double-precision arithmetic internally for ~1 ULP across all SIMD
backends. Scalar uses a table-based double core; SIMD uses width-dependent
dispatch (table-assisted for ≤4 lanes, fully tableless for ≥8 lanes).
With `kBoundsValues = true`, handles all IEEE 754 special cases (negative
base, zero, inf, NaN, subnormals).

`tanh` uses `expm1(2x) / (expm1(2x) + 2)` with a Sollya-optimized polynomial
fast-path for scalar |x| < 1. Saturates to ±1 for |x| > 10.

`erf` uses an Abramowitz & Stegun 7.1.26-inspired t-substitution with
Sollya-optimized coefficients (p=0.45). Two domains: pure polynomial for
|x| < 0.875, erfc formula with inline exp(-x²) for |x| ∈ [0.875, 3.92].
NaN propagates naturally via `clamp_allow_nan` (no `kBoundsValues` overhead).
Default and MaxAccuracy are identical.

## Performance

Speedup ratios relative to platform libc (scalar) or scalar fast_math (SIMD).
Absolute throughput varies by clock speed and microarchitecture; ratios are
more meaningful for comparison.

### Linux x86-64 — AMD EPYC Genoa

166 cores, clang 21.1.7, `-O2 -march=native`, CMake Release build.

#### Scalar: fast_math vs glibc

| Function | Speedup | Function | Speedup |
|:---------|:-:|:---------|:-:|
| sin | 1.6x | exp | 3.8x |
| cos | 1.6x | exp2 | 2.7x |
| tan | 2.9x | exp10 | 2.5x |
| acos | 2.2x | log | 3.4x |
| asin | 3.4x | log2 | 3.0x |
| atan | 1.7x | log10 | 3.3x |
| atan2 | 3.3x | cbrt | 5.6x |
| frexp | 3.8x | ldexp | 5.8x |
| hypot | 2.3x | pow | 1.2x |
| expm1 | 2.4x | log1p | 1.4x |
| tanh | 3.0x | | |
| erf | 1.6x | | |

#### SIMD Scaling (per-element throughput relative to scalar fast_math)

| Function | SSE (4) | AVX (8) | AVX-512 (16) | Highway (16) |
|:---------|:-:|:-:|:-:|:-:|
| sin | 2.7x | 5.4x | 6.5x | 5.8x |
| cos | 2.6x | 5.2x | 6.5x | 5.7x |
| tan | 3.6x | 7.0x | 8.7x | 7.7x |
| acos | 4.9x | 9.8x | 10.1x | 10.2x |
| asin | 1.9x | 3.8x | 4.3x | 4.3x |
| atan | 4.8x | 9.5x | 11.9x | 11.9x |
| atan2 | 3.5x | 7.0x | 9.6x | 9.6x |
| exp | 3.7x | 7.4x | 7.7x | 7.3x |
| exp2 | 3.7x | 7.5x | 8.8x | 7.2x |
| exp10 | 3.7x | 7.3x | 8.6x | 7.1x |
| log | 3.7x | 7.4x | 8.8x | 8.8x |
| log2 | 3.6x | 7.3x | 9.6x | 9.6x |
| log10 | 3.7x | 7.4x | 9.2x | 9.2x |
| cbrt | 2.9x | 5.7x | 7.0x | 7.1x |
| frexp | 7.4x | 14.8x | 23.0x | 22.9x |
| ldexp | 4.8x | 9.7x | 16.6x | 16.6x |
| hypot | 3.6x | — | 9.9x | — |
| pow | — | 3.6x | 7.2x | — |
| expm1 | 2.7x | 5.4x | 10.4x | 9.4x |
| log1p | 3.0x | 6.0x | 12.5x | 12.4x |
| tanh | 3.0x | 6.1x | 12.2x | 10.1x |
| erf | 2.4x | 5.1x | 5.5x | 4.9x |

Highway dispatches to AVX-512 (16-lane) on this hardware.

#### Highway vs hwy::contrib (AVX-512 dispatch)

| Function | Ratio | Function | Ratio |
|:---------|:-:|:---------|:-:|
| sin | 0.76x | log | 1.60x |
| cos | 0.74x | log2 | 1.62x |
| exp | 1.63x | log10 | 1.67x |
| exp2 | 2.21x | atan | 1.12x |
| acos | 1.63x | asin | 0.87x |
| atan2 | 1.67x | | |

fast_math is 1.6-2.2x faster for exp/log family, comparable for inverse trig,
and ~25% slower for sin/cos (contrib uses a different polynomial fit).

#### MaxAccuracyTraits overhead (SSE4.1)

| Function | Overhead | Function | Overhead |
|:---------|:-:|:---------|:-:|
| sin | ~0% | log | 46% |
| cos | ~0% | cbrt | 24% |
| exp | 37% | hypot | 18% (SSE), 29% (AVX-512) |

### Mac ARM — Apple M4 Pro

12 cores, 48 GB, Apple clang, `-O2`.

#### Scalar: fast_math vs Apple libm

| Function | Speedup | Function | Speedup |
|:---------|:-:|:---------|:-:|
| sin | 1.0x | exp | 1.7x |
| cos | 1.4x | exp2 | 1.7x |
| tan | 1.2x | exp10 | 1.6x |
| acos | 1.6x | log | 1.6x |
| asin | 1.8x | log2 | 1.0x |
| atan | 1.8x | log10 | 1.6x |
| atan2 | 1.7x | cbrt | 2.6x |
| frexp | 1.4x | ldexp | 2.0x |
| hypot | 1.6x | | |

Apple's libm is highly optimized for M-series silicon; the smaller speedups
reflect a stronger baseline rather than slower fast_math.

#### SIMD Scaling (per-element throughput relative to scalar fast_math)

| Function | NEON (4) | Highway (4) |
|:---------|:-:|:-:|
| sin | 5.4x | 4.9x |
| cos | 3.7x | 3.5x |
| tan | 4.6x | 4.1x |
| acos | 4.8x | 3.5x |
| asin | 2.8x | 2.9x |
| atan | 3.4x | 2.3x |
| atan2 | 2.2x | 1.3x |
| exp | 3.9x | 3.6x |
| exp2 | 4.0x | 3.9x |
| exp10 | 3.9x | 2.7x |
| log | 4.4x | 2.7x |
| log2 | 3.5x | 1.1x |
| log10 | 2.5x | 1.6x |
| cbrt | 3.1x | 2.4x |
| frexp | 4.0x | 4.3x |
| ldexp | 4.0x | 4.0x |
| hypot | 2.1x | — |

Highway dispatches to NEON (4-lane) on AArch64. NEON backend is generally
faster than Highway on ARM due to lower abstraction overhead.


================================================
FILE: dispenso/fast_math/detail/double_promote.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// DoubleVec<Flt>: a pair of double-precision vectors covering all lanes of Flt.
//
// Provides the minimal set of operations needed for the tableless
// double-precision pow core: widen, narrow, arithmetic, FMA, and
// exp2 range-reduction helpers.
//
// Specialized for each SIMD backend alongside its float type:
//   float     → double (scalar)
//   SseFloat  → __m128d lo, hi
//   AvxFloat  → __m256d lo, hi
//   Avx512Float → __m512d lo, hi
//   NeonFloat → float64x2_t lo, hi
//   HwyFloat  → Vec<Repartition<double, FloatTag>> lo, hi

#pragma once

#include <cmath>
#include <cstdint>
#include <utility>

#include <dispenso/fast_math/util.h>

namespace dispenso {
namespace fast_math {
namespace detail {

// Primary template — undefined; each backend specializes.
template <typename Flt>
struct DoubleVec;

// ---------------------------------------------------------------------------
// Scalar: DoubleVec<float> is just a double.
// ---------------------------------------------------------------------------
template <>
struct DoubleVec<float> {
  double v;

  DoubleVec() = default;
  explicit DoubleVec(double d) : v(d) {}

  static DISPENSO_INLINE DoubleVec from_float(float f) {
    return DoubleVec{static_cast<double>(f)};
  }

  // Gather: load table[index] as a double.
  static DISPENSO_INLINE DoubleVec gather(const double* base, int32_t index) {
    return DoubleVec{base[index]};
  }

  DISPENSO_INLINE float to_float() const {
    return static_cast<float>(v);
  }

  friend DISPENSO_INLINE DoubleVec operator+(DoubleVec a, DoubleVec b) {
    return DoubleVec{a.v + b.v};
  }
  friend DISPENSO_INLINE DoubleVec operator-(DoubleVec a, DoubleVec b) {
    return DoubleVec{a.v - b.v};
  }
  friend DISPENSO_INLINE DoubleVec operator*(DoubleVec a, DoubleVec b) {
    return DoubleVec{a.v * b.v};
  }

  friend DISPENSO_INLINE DoubleVec fma(DoubleVec a, DoubleVec b, DoubleVec c) {
    return DoubleVec{std::fma(a.v, b.v, c.v)};
  }

  friend DISPENSO_INLINE DoubleVec clamp(DoubleVec x, DoubleVec low, DoubleVec high) {
    return DoubleVec{x.v < low.v ? low.v : (x.v > high.v ? high.v : x.v)};
  }
};

// exp2 range reduction: free function, defined after DoubleVec is complete.
// Splits x into integer n and fractional r ∈ [-0.5, 0.5].
// Returns {r, scale} where scale = 2^n.
// Boundary cases (from ylogx clamped to [-1022, 1023]):
//   n = 1024 → scale = +inf (0x7FF0...), result overflows to +inf — correct.
//   n = -1023 → scale = 0.0, result underflows to 0 — correct.
DISPENSO_INLINE std::pair<DoubleVec<float>, DoubleVec<float>> exp2_split(DoubleVec<float> x) {
  using DV = DoubleVec<float>;
  constexpr double kShift = 0x1.8p+52;
  constexpr uint64_t kShiftBits = 0x4338000000000000ULL;

  double shifted = x.v + kShift;
  uint64_t ki = bit_cast<uint64_t>(shifted);
  double nd = shifted - kShift;
  double r = x.v - nd;

  int64_t n = static_cast<int64_t>(ki - kShiftBits);
  double scale = bit_cast<double>(static_cast<uint64_t>(n + 1023) << 52);

  return {DV{r}, DV{scale}};
}

// ---------------------------------------------------------------------------
// SSE: DoubleVec<SseFloat> holds two __m128d (4 float lanes → 2×2 doubles).
// ---------------------------------------------------------------------------
#if defined(__SSE4_1__)

template <>
struct DoubleVec<SseFloat> {
  __m128d lo, hi; // lo = lanes 0,1; hi = lanes 2,3

  DoubleVec() = default;
  DoubleVec(__m128d l, __m128d h) : lo(l), hi(h) {}
  explicit DoubleVec(double d) : lo(_mm_set1_pd(d)), hi(_mm_set1_pd(d)) {}

  static DISPENSO_INLINE DoubleVec from_float(SseFloat f) {
    return {_mm_cvtps_pd(f.v), _mm_cvtps_pd(_mm_movehl_ps(f.v, f.v))};
  }

  // Gather: load base[idx[lane]] for each of 4 lanes via scalar lookups.
  // Used for small tables that fit in L1 (e.g. 16-entry pow log2 table).
  static DISPENSO_INLINE DoubleVec gather(const double* base, SseInt32 idx) {
    int32_t i0 = _mm_extract_epi32(idx.v, 0);
    int32_t i1 = _mm_extract_epi32(idx.v, 1);
    int32_t i2 = _mm_extract_epi32(idx.v, 2);
    int32_t i3 = _mm_extract_epi32(idx.v, 3);
    return {_mm_set_pd(base[i1], base[i0]), _mm_set_pd(base[i3], base[i2])};
  }

  DISPENSO_INLINE SseFloat to_float() const {
    __m128 lo_f = _mm_cvtpd_ps(lo);
    __m128 hi_f = _mm_cvtpd_ps(hi);
    return SseFloat{_mm_movelh_ps(lo_f, hi_f)};
  }

  friend DISPENSO_INLINE DoubleVec operator+(DoubleVec a, DoubleVec b) {
    return {_mm_add_pd(a.lo, b.lo), _mm_add_pd(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator-(DoubleVec a, DoubleVec b) {
    return {_mm_sub_pd(a.lo, b.lo), _mm_sub_pd(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator*(DoubleVec a, DoubleVec b) {
    return {_mm_mul_pd(a.lo, b.lo), _mm_mul_pd(a.hi, b.hi)};
  }

  friend DISPENSO_INLINE DoubleVec fma(DoubleVec a, DoubleVec b, DoubleVec c) {
#if defined(__FMA__)
    return {_mm_fmadd_pd(a.lo, b.lo, c.lo), _mm_fmadd_pd(a.hi, b.hi, c.hi)};
#else
    return {_mm_add_pd(_mm_mul_pd(a.lo, b.lo), c.lo), _mm_add_pd(_mm_mul_pd(a.hi, b.hi), c.hi)};
#endif
  }

  friend DISPENSO_INLINE DoubleVec clamp(DoubleVec x, DoubleVec low, DoubleVec high) {
    return {
        _mm_min_pd(_mm_max_pd(x.lo, low.lo), high.lo),
        _mm_min_pd(_mm_max_pd(x.hi, low.hi), high.hi)};
  }
};

DISPENSO_INLINE std::pair<DoubleVec<SseFloat>, DoubleVec<SseFloat>> exp2_split(
    DoubleVec<SseFloat> x) {
  using DV = DoubleVec<SseFloat>;
  __m128d kShift = _mm_set1_pd(0x1.8p+52);
  __m128i kShiftBits = _mm_set1_epi64x(0x4338000000000000LL);
  __m128i k1023 = _mm_set1_epi64x(1023);

  __m128d shifted_lo = _mm_add_pd(x.lo, kShift);
  __m128d shifted_hi = _mm_add_pd(x.hi, kShift);

  __m128i ki_lo = _mm_castpd_si128(shifted_lo);
  __m128i ki_hi = _mm_castpd_si128(shifted_hi);

  __m128d nd_lo = _mm_sub_pd(shifted_lo, kShift);
  __m128d nd_hi = _mm_sub_pd(shifted_hi, kShift);

  __m128d r_lo = _mm_sub_pd(x.lo, nd_lo);
  __m128d r_hi = _mm_sub_pd(x.hi, nd_hi);

  __m128i n_lo = _mm_sub_epi64(ki_lo, kShiftBits);
  __m128i n_hi = _mm_sub_epi64(ki_hi, kShiftBits);

  __m128d scale_lo = _mm_castsi128_pd(_mm_slli_epi64(_mm_add_epi64(n_lo, k1023), 52));
  __m128d scale_hi = _mm_castsi128_pd(_mm_slli_epi64(_mm_add_epi64(n_hi, k1023), 52));

  return {DV{r_lo, r_hi}, DV{scale_lo, scale_hi}};
}

#endif // __SSE4_1__

// ---------------------------------------------------------------------------
// AVX: DoubleVec<AvxFloat> holds two __m256d (8 float lanes → 2×4 doubles).
// ---------------------------------------------------------------------------
#if defined(__AVX2__)

template <>
struct DoubleVec<AvxFloat> {
  __m256d lo, hi; // lo = lanes 0-3; hi = lanes 4-7

  DoubleVec() = default;
  DoubleVec(__m256d l, __m256d h) : lo(l), hi(h) {}
  explicit DoubleVec(double d) : lo(_mm256_set1_pd(d)), hi(_mm256_set1_pd(d)) {}

  static DISPENSO_INLINE DoubleVec from_float(AvxFloat f) {
    return {
        _mm256_cvtps_pd(_mm256_castps256_ps128(f.v)),
        _mm256_cvtps_pd(_mm256_extractf128_ps(f.v, 1))};
  }

  // Gather: AVX2 has native 4-wide double gather from int32 indices.
  // Two gathers cover all 8 lanes. The table is 256 bytes (4 cachelines),
  // likely L1-hot after the first call.
  static DISPENSO_INLINE DoubleVec gather(const double* base, AvxInt32 idx) {
    __m128i lo4 = _mm256_castsi256_si128(idx.v);
    __m128i hi4 = _mm256_extracti128_si256(idx.v, 1);
    return {_mm256_i32gather_pd(base, lo4, 8), _mm256_i32gather_pd(base, hi4, 8)};
  }

  DISPENSO_INLINE AvxFloat to_float() const {
    __m128 lo_f = _mm256_cvtpd_ps(lo);
    __m128 hi_f = _mm256_cvtpd_ps(hi);
    return AvxFloat{_mm256_set_m128(hi_f, lo_f)};
  }

  friend DISPENSO_INLINE DoubleVec operator+(DoubleVec a, DoubleVec b) {
    return {_mm256_add_pd(a.lo, b.lo), _mm256_add_pd(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator-(DoubleVec a, DoubleVec b) {
    return {_mm256_sub_pd(a.lo, b.lo), _mm256_sub_pd(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator*(DoubleVec a, DoubleVec b) {
    return {_mm256_mul_pd(a.lo, b.lo), _mm256_mul_pd(a.hi, b.hi)};
  }

  friend DISPENSO_INLINE DoubleVec fma(DoubleVec a, DoubleVec b, DoubleVec c) {
#if defined(__FMA__)
    return {_mm256_fmadd_pd(a.lo, b.lo, c.lo), _mm256_fmadd_pd(a.hi, b.hi, c.hi)};
#else
    return {
        _mm256_add_pd(_mm256_mul_pd(a.lo, b.lo), c.lo),
        _mm256_add_pd(_mm256_mul_pd(a.hi, b.hi), c.hi)};
#endif
  }

  friend DISPENSO_INLINE DoubleVec clamp(DoubleVec x, DoubleVec low, DoubleVec high) {
    return {
        _mm256_min_pd(_mm256_max_pd(x.lo, low.lo), high.lo),
        _mm256_min_pd(_mm256_max_pd(x.hi, low.hi), high.hi)};
  }
};

DISPENSO_INLINE std::pair<DoubleVec<AvxFloat>, DoubleVec<AvxFloat>> exp2_split(
    DoubleVec<AvxFloat> x) {
  using DV = DoubleVec<AvxFloat>;
  __m256d kShift = _mm256_set1_pd(0x1.8p+52);
  __m256i kShiftBits = _mm256_set1_epi64x(0x4338000000000000LL);
  __m256i k1023 = _mm256_set1_epi64x(1023);

  __m256d shifted_lo = _mm256_add_pd(x.lo, kShift);
  __m256d shifted_hi = _mm256_add_pd(x.hi, kShift);

  __m256i ki_lo = _mm256_castpd_si256(shifted_lo);
  __m256i ki_hi = _mm256_castpd_si256(shifted_hi);

  __m256d nd_lo = _mm256_sub_pd(shifted_lo, kShift);
  __m256d nd_hi = _mm256_sub_pd(shifted_hi, kShift);

  __m256d r_lo = _mm256_sub_pd(x.lo, nd_lo);
  __m256d r_hi = _mm256_sub_pd(x.hi, nd_hi);

  __m256i n_lo = _mm256_sub_epi64(ki_lo, kShiftBits);
  __m256i n_hi = _mm256_sub_epi64(ki_hi, kShiftBits);

  __m256d scale_lo = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_add_epi64(n_lo, k1023), 52));
  __m256d scale_hi = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_add_epi64(n_hi, k1023), 52));

  return {DV{r_lo, r_hi}, DV{scale_lo, scale_hi}};
}

#endif // __AVX2__

// ---------------------------------------------------------------------------
// AVX-512: DoubleVec<Avx512Float> holds two __m512d (16 floats → 2×8 doubles).
// ---------------------------------------------------------------------------
#if defined(__AVX512F__)

template <>
struct DoubleVec<Avx512Float> {
  __m512d lo, hi; // lo = lanes 0-7; hi = lanes 8-15

  DoubleVec() = default;
  DoubleVec(__m512d l, __m512d h) : lo(l), hi(h) {}
  explicit DoubleVec(double d) : lo(_mm512_set1_pd(d)), hi(_mm512_set1_pd(d)) {}

  static DISPENSO_INLINE DoubleVec from_float(Avx512Float f) {
    __m256 lo8 = _mm512_castps512_ps256(f.v);
    // Extract upper 8 floats without requiring AVX-512 DQ:
    // cast to __m512i, extract upper 256-bit int lane, reinterpret as __m256.
    __m256 hi8 = _mm256_castsi256_ps(_mm512_extracti64x4_epi64(_mm512_castps_si512(f.v), 1));
    return {_mm512_cvtps_pd(lo8), _mm512_cvtps_pd(hi8)};
  }

  // Gather: AVX-512 has native 8-wide double gather from int32 indices.
  // Two gathers cover all 16 lanes.
  static DISPENSO_INLINE DoubleVec gather(const double* base, Avx512Int32 idx) {
    __m256i lo8 = _mm512_castsi512_si256(idx.v);
    __m256i hi8 = _mm512_extracti64x4_epi64(idx.v, 1);
    return {_mm512_i32gather_pd(lo8, base, 8), _mm512_i32gather_pd(hi8, base, 8)};
  }

  DISPENSO_INLINE Avx512Float to_float() const {
    __m256 lo_f = _mm512_cvtpd_ps(lo);
    __m256 hi_f = _mm512_cvtpd_ps(hi);
    // Combine without AVX-512 DQ: cast to int, insert, cast back.
    __m512i combined = _mm512_inserti64x4(
        _mm512_castsi256_si512(_mm256_castps_si256(lo_f)), _mm256_castps_si256(hi_f), 1);
    return Avx512Float{_mm512_castsi512_ps(combined)};
  }

  friend DISPENSO_INLINE DoubleVec operator+(DoubleVec a, DoubleVec b) {
    return {_mm512_add_pd(a.lo, b.lo), _mm512_add_pd(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator-(DoubleVec a, DoubleVec b) {
    return {_mm512_sub_pd(a.lo, b.lo), _mm512_sub_pd(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator*(DoubleVec a, DoubleVec b) {
    return {_mm512_mul_pd(a.lo, b.lo), _mm512_mul_pd(a.hi, b.hi)};
  }

  // AVX-512 always has FMA.
  friend DISPENSO_INLINE DoubleVec fma(DoubleVec a, DoubleVec b, DoubleVec c) {
    return {_mm512_fmadd_pd(a.lo, b.lo, c.lo), _mm512_fmadd_pd(a.hi, b.hi, c.hi)};
  }

  friend DISPENSO_INLINE DoubleVec clamp(DoubleVec x, DoubleVec low, DoubleVec high) {
    return {
        _mm512_min_pd(_mm512_max_pd(x.lo, low.lo), high.lo),
        _mm512_min_pd(_mm512_max_pd(x.hi, low.hi), high.hi)};
  }
};

DISPENSO_INLINE std::pair<DoubleVec<Avx512Float>, DoubleVec<Avx512Float>> exp2_split(
    DoubleVec<Avx512Float> x) {
  using DV = DoubleVec<Avx512Float>;
  __m512d kShift = _mm512_set1_pd(0x1.8p+52);
  __m512i kShiftBits = _mm512_set1_epi64(0x4338000000000000LL);
  __m512i k1023 = _mm512_set1_epi64(1023);

  __m512d shifted_lo = _mm512_add_pd(x.lo, kShift);
  __m512d shifted_hi = _mm512_add_pd(x.hi, kShift);

  __m512i ki_lo = _mm512_castpd_si512(shifted_lo);
  __m512i ki_hi = _mm512_castpd_si512(shifted_hi);

  __m512d nd_lo = _mm512_sub_pd(shifted_lo, kShift);
  __m512d nd_hi = _mm512_sub_pd(shifted_hi, kShift);

  __m512d r_lo = _mm512_sub_pd(x.lo, nd_lo);
  __m512d r_hi = _mm512_sub_pd(x.hi, nd_hi);

  __m512i n_lo = _mm512_sub_epi64(ki_lo, kShiftBits);
  __m512i n_hi = _mm512_sub_epi64(ki_hi, kShiftBits);

  __m512d scale_lo = _mm512_castsi512_pd(_mm512_slli_epi64(_mm512_add_epi64(n_lo, k1023), 52));
  __m512d scale_hi = _mm512_castsi512_pd(_mm512_slli_epi64(_mm512_add_epi64(n_hi, k1023), 52));

  return {DV{r_lo, r_hi}, DV{scale_lo, scale_hi}};
}

#endif // __AVX512F__

// ---------------------------------------------------------------------------
// NEON: DoubleVec<NeonFloat> holds two float64x2_t (4 floats → 2×2 doubles).
// ---------------------------------------------------------------------------
#if defined(__aarch64__)

template <>
struct DoubleVec<NeonFloat> {
  float64x2_t lo, hi; // lo = lanes 0,1; hi = lanes 2,3

  DoubleVec() = default;
  DoubleVec(float64x2_t l, float64x2_t h) : lo(l), hi(h) {}
  explicit DoubleVec(double d) : lo(vdupq_n_f64(d)), hi(vdupq_n_f64(d)) {}

  static DISPENSO_INLINE DoubleVec from_float(NeonFloat f) {
    return {vcvt_f64_f32(vget_low_f32(f.v)), vcvt_f64_f32(vget_high_f32(f.v))};
  }

  // Gather: load base[idx[lane]] for each of 4 lanes via scalar lookups.
  static DISPENSO_INLINE DoubleVec gather(const double* base, NeonInt32 idx) {
    int32_t i0 = vgetq_lane_s32(idx.v, 0);
    int32_t i1 = vgetq_lane_s32(idx.v, 1);
    int32_t i2 = vgetq_lane_s32(idx.v, 2);
    int32_t i3 = vgetq_lane_s32(idx.v, 3);
    float64x2_t lo_d = vsetq_lane_f64(base[i1], vdupq_n_f64(base[i0]), 1);
    float64x2_t hi_d = vsetq_lane_f64(base[i3], vdupq_n_f64(base[i2]), 1);
    return {lo_d, hi_d};
  }

  DISPENSO_INLINE NeonFloat to_float() const {
    float32x2_t lo_f = vcvt_f32_f64(lo);
    float32x2_t hi_f = vcvt_f32_f64(hi);
    return NeonFloat{vcombine_f32(lo_f, hi_f)};
  }

  friend DISPENSO_INLINE DoubleVec operator+(DoubleVec a, DoubleVec b) {
    return {vaddq_f64(a.lo, b.lo), vaddq_f64(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator-(DoubleVec a, DoubleVec b) {
    return {vsubq_f64(a.lo, b.lo), vsubq_f64(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator*(DoubleVec a, DoubleVec b) {
    return {vmulq_f64(a.lo, b.lo), vmulq_f64(a.hi, b.hi)};
  }

  // AArch64 always has FMA.
  friend DISPENSO_INLINE DoubleVec fma(DoubleVec a, DoubleVec b, DoubleVec c) {
    return {vfmaq_f64(c.lo, a.lo, b.lo), vfmaq_f64(c.hi, a.hi, b.hi)};
  }

  friend DISPENSO_INLINE DoubleVec clamp(DoubleVec x, DoubleVec low, DoubleVec high) {
    return {
        vminnmq_f64(vmaxnmq_f64(x.lo, low.lo), high.lo),
        vminnmq_f64(vmaxnmq_f64(x.hi, low.hi), high.hi)};
  }
};

DISPENSO_INLINE std::pair<DoubleVec<NeonFloat>, DoubleVec<NeonFloat>> exp2_split(
    DoubleVec<NeonFloat> x) {
  using DV = DoubleVec<NeonFloat>;
  float64x2_t kShift = vdupq_n_f64(0x1.8p+52);

  float64x2_t shifted_lo = vaddq_f64(x.lo, kShift);
  float64x2_t shifted_hi = vaddq_f64(x.hi, kShift);

  uint64x2_t ki_lo = vreinterpretq_u64_f64(shifted_lo);
  uint64x2_t ki_hi = vreinterpretq_u64_f64(shifted_hi);

  float64x2_t nd_lo = vsubq_f64(shifted_lo, kShift);
  float64x2_t nd_hi = vsubq_f64(shifted_hi, kShift);

  float64x2_t r_lo = vsubq_f64(x.lo, nd_lo);
  float64x2_t r_hi = vsubq_f64(x.hi, nd_hi);

  uint64x2_t kShiftBits = vdupq_n_u64(0x4338000000000000ULL);
  uint64x2_t k1023 = vdupq_n_u64(1023);

  uint64x2_t n_lo = vsubq_u64(ki_lo, kShiftBits);
  uint64x2_t n_hi = vsubq_u64(ki_hi, kShiftBits);

  float64x2_t scale_lo = vreinterpretq_f64_u64(vshlq_n_u64(vaddq_u64(n_lo, k1023), 52));
  float64x2_t scale_hi = vreinterpretq_f64_u64(vshlq_n_u64(vaddq_u64(n_hi, k1023), 52));

  return {DV{r_lo, r_hi}, DV{scale_lo, scale_hi}};
}

#endif // __aarch64__

// ---------------------------------------------------------------------------
// Highway: DoubleVec<HwyFloat> holds two Vec<Repartition<double, FloatTag>>.
// Width adapts to the compile-time target (SSE→2×2, AVX→2×4, AVX-512→2×8).
// ---------------------------------------------------------------------------
#if __has_include("hwy/highway.h")

#include <dispenso/fast_math/float_traits_hwy.h>

template <>
struct DoubleVec<HwyFloat> {
  using DTag = hn::Repartition<double, HwyFloatTag>;
  using DV = hn::Vec<DTag>;
  using ITag = hn::RebindToSigned<DTag>;

  DV lo, hi; // lo = lower half lanes, hi = upper half lanes

  DoubleVec() = default;
  DoubleVec(DV l, DV h) : lo(l), hi(h) {}
  explicit DoubleVec(double d) : lo(hn::Set(DTag{}, d)), hi(hn::Set(DTag{}, d)) {}

  static DISPENSO_INLINE DoubleVec from_float(HwyFloat f) {
    const DTag dd;
    return {hn::PromoteLowerTo(dd, f.v), hn::PromoteUpperTo(dd, f.v)};
  }

  // Gather: load base[idx[lane]] for each lane via scalar lookups.
  // Highway lacks a double-precision gather from int32 indices, so we
  // extract indices and load scalars. The table is small and L1-hot.
  static DISPENSO_INLINE DoubleVec gather(const double* base, HwyInt32 idx) {
    const DTag dd;
    constexpr size_t kMaxF = HWY_MAX_BYTES / sizeof(float);
    constexpr size_t kMaxD = kMaxF / 2; // half as many double lanes
    HWY_ALIGN int32_t ibuf[kMaxF];
    hn::StoreU(idx.v, HwyInt32Tag{}, ibuf);
    HWY_ALIGN double lo_buf[kMaxD];
    HWY_ALIGN double hi_buf[kMaxD];
    const size_t nd = hn::Lanes(dd);
    for (size_t j = 0; j < nd; ++j) {
      lo_buf[j] = base[ibuf[j]];
      hi_buf[j] = base[ibuf[nd + j]];
    }
    return {hn::Load(dd, lo_buf), hn::Load(dd, hi_buf)};
  }

  DISPENSO_INLINE HwyFloat to_float() const {
    const HwyFloatTag df;
    // DemoteTo narrows each double half to a half-width float vector.
    auto lo_f = hn::DemoteTo(hn::Rebind<float, DTag>{}, lo);
    auto hi_f = hn::DemoteTo(hn::Rebind<float, DTag>{}, hi);
    // Combine: lo_f and hi_f are each half-width Vec<float>; concatenate.
    return HwyFloat{hn::Combine(df, hi_f, lo_f)};
  }

  friend DISPENSO_INLINE DoubleVec operator+(DoubleVec a, DoubleVec b) {
    return {hn::Add(a.lo, b.lo), hn::Add(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator-(DoubleVec a, DoubleVec b) {
    return {hn::Sub(a.lo, b.lo), hn::Sub(a.hi, b.hi)};
  }
  friend DISPENSO_INLINE DoubleVec operator*(DoubleVec a, DoubleVec b) {
    return {hn::Mul(a.lo, b.lo), hn::Mul(a.hi, b.hi)};
  }

  friend DISPENSO_INLINE DoubleVec fma(DoubleVec a, DoubleVec b, DoubleVec c) {
    return {hn::MulAdd(a.lo, b.lo, c.lo), hn::MulAdd(a.hi, b.hi, c.hi)};
  }

  friend DISPENSO_INLINE DoubleVec clamp(DoubleVec x, DoubleVec low, DoubleVec high) {
    return {hn::Min(hn::Max(x.lo, low.lo), high.lo), hn::Min(hn::Max(x.hi, low.hi), high.hi)};
  }
};

DISPENSO_INLINE std::pair<DoubleVec<HwyFloat>, DoubleVec<HwyFloat>> exp2_split(
    DoubleVec<HwyFloat> x) {
  using DVH = DoubleVec<HwyFloat>;
  using DTag = DVH::DTag;
  using ITag = DVH::ITag;

  auto kShift = hn::Set(DTag{}, 0x1.8p+52);
  auto kShiftBits = hn::Set(ITag{}, 0x4338000000000000LL);
  auto k1023 = hn::Set(ITag{}, int64_t{1023});

  auto shifted_lo = hn::Add(x.lo, kShift);
  auto shifted_hi = hn::Add(x.hi, kShift);

  auto ki_lo = hn::BitCast(ITag{}, shifted_lo);
  auto ki_hi = hn::BitCast(ITag{}, shifted_hi);

  auto nd_lo = hn::Sub(shifted_lo, kShift);
  auto nd_hi = hn::Sub(shifted_hi, kShift);

  auto r_lo = hn::Sub(x.lo, nd_lo);
  auto r_hi = hn::Sub(x.hi, nd_hi);

  auto n_lo = hn::Sub(ki_lo, kShiftBits);
  auto n_hi = hn::Sub(ki_hi, kShiftBits);

  auto scale_lo = hn::BitCast(DTag{}, hn::ShiftLeft<52>(hn::Add(n_lo, k1023)));
  auto scale_hi = hn::BitCast(DTag{}, hn::ShiftLeft<52>(hn::Add(n_hi, k1023)));

  return {DVH{r_lo, r_hi}, DVH{scale_lo, scale_hi}};
}

#endif // __has_include("hwy/highway.h")

} // namespace detail
} // namespace fast_math
} // namespace dispenso


================================================
FILE: dispenso/fast_math/detail/fast_math_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <array>
#include "../util.h"

namespace dispenso {
namespace fast_math {
namespace detail {

// Constants and algorithms come from "Fast Calculation of Cube and Inverse Cube Roots Using a Magic
// Constant and Its Implementation on Microcontrollers" by Moroz et al.
template <typename Flt, typename AccuracyTraits>
DISPENSO_INLINE Flt rcp_cbrt(Flt x, IntType_t<Flt> i) {
  auto fma = FloatTraits<Flt>::fma;
  if constexpr (AccuracyTraits::kMaxAccuracy) {
    // Algorithm 3 step 1 + FMA quadratic correction (2 ULP, division-free Halley).
    //
    // Step 1 uses Algorithm 3's magic constant and tuned Newton step (~10 bits).
    // Step 2 computes the FMA residual e = 1 - x*y^3 with single rounding (no
    // cancellation), then applies a Sollya-optimized quadratic correction:
    //   y *= 1 + e*(c1 + c2*e)
    // This achieves cubic convergence (bits triple) without Halley's division,
    // at the same op count as the previous Algorithm 5 but with better accuracy.
    //
    // Polynomial: fpminimax for (1-e)^(-1/3), constant=1, on [-0.005, 0.005].
    // Relative error ~1.08e-8 (~26.5 bits).
    constexpr float c1 = 0.3333354890346527099609375f;
    constexpr float c2 = 0.22222582995891571044921875f;
    i = 0x548c39cb - int_div_by_3(i);
    Flt y = bit_cast<Flt>(i);
    y = y * fma(-0.534850249f * x * y, y * y, 1.5015480449f);
    Flt e = fma(x * y, -y * y, 1.0f);
    y = y * fma(e, fma(Flt(c2), e, Flt(c1)), 1.0f);
    return y;
  } else {
    // Algorithm 3 from Moroz et al. (12 ULP).
    i = 0x548c39cb - int_div_by_3(i);
    Flt y = bit_cast<Flt>(i);
    y = y * fma(-0.534850249f * x * y, y * y, 1.5015480449f);
    y = y * fma(-0.3333333333f * x * y, y * y, 1.333333985f);
    return y;
  }
}

// TODO: We likely will never need a double precision version of this function, but note that this
// is definitely busted for double.
template <typename Flt>
DISPENSO_INLINE Flt frexpImpl(IntType_t<Flt> hx, IntType_t<Flt>* eptr) {
  using IntT = IntType_t<Flt>;

  IntT ix = 0x7fffffff & hx;

  auto nonzero = hx != 0;
  *eptr = FloatTraits<Flt>::conditional(nonzero, (ix >> 23) - 126, IntT(0));
  return bit_cast<Flt>((hx & 0x807fffff) | 0x3f000000);
}

template <typename Flt>
DISPENSO_INLINE Flt ldexpImpl(UintType_t<Flt> hx, IntType_t<Flt> e) {
  auto esgn = signofi<Flt>(e);
  e *= esgn;
  hx += esgn * (e << 23);
  return bit_cast<Flt>(hx);
}

template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt cos_pi_4(Flt x2) {
  // Approximate cosine on [-PI/4,+PI/4], max error 0.87444 ULP.
  // Polynomial in x²: c0 + c1*x² + c2*x⁴ + c3*x⁶ + c4*x⁸.
  return dispenso::fast_math::hornerEval(
      x2, 2.44677067e-5f, -1.38877297e-3f, 4.16666567e-2f, -5.00000000e-1f, 1.00000000e+0f);
}

template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt sin_pi_4(Flt x2, Flt x) {
  // Approximate sine on [-PI/4,+PI/4].
  // Sollya fpminimax(sin(x) - x, [|3,5,7,9|], [|SG...|], [-pi/4;pi/4], absolute).
  // Odd polynomial: sin(x) = x + x³ * P(x²).
  Flt s = dispenso::fast_math::hornerEval(
      x2, 2.78547373e-6f, -1.98483889e-4f, 8.33336823e-3f, -1.66666672e-1f);
  return FloatTraits<Flt>::fma(s, x * x2, x);
}

// Wider-domain sin/cos polynomials for pi-reduction (no quadrant blending needed).
// sin(x) on [-pi/2, pi/2], degree 11 odd polynomial: sin(x) = x * P(x^2)
// Sollya fpminimax(sin(x), [|1,3,5,7,9,11|], [|SG...|], [-pi/2;pi/2], absolute)
// Absolute error: 3.2e-10 (31.5 bits)
template <typename Flt>
DISPENSO_INLINE Flt sin_pi_2(Flt x2, Flt x) {
  // Odd polynomial: sin(x) = x + x³ * P(x²).
  Flt s = dispenso::fast_math::hornerEval(
      x2, -0x1.ab55a8p-26f, 0x1.725326p-19f, -0x1.a0205p-13f, 0x1.11112ep-7f, -0x1.555556p-3f);
  return FloatTraits<Flt>::fma(s, x * x2, x);
}

template <typename Flt>
DISPENSO_INLINE Flt
rangeReduceFloor(Flt& x, NonDeduced<Flt> rcpMod, NonDeduced<Flt> valHi, NonDeduced<Flt> valLo) {
  auto j = floor_small(x * rcpMod); // FloatTraits<Flt>::fma(x, rcpMod, kMagic) - kMagic;
  x = FloatTraits<Flt>::fma(j, -valHi, x);
  x = FloatTraits<Flt>::fma(j, -valLo, x);
  return j;
}

template <typename Flt>
DISPENSO_INLINE Flt
rangeReduce(Flt& x, NonDeduced<Flt> rcpMod, NonDeduced<Flt> valHi, NonDeduced<Flt> valLo) {
  const Flt kMagic = FloatTraits<Flt>::kMagic;
  auto j = FloatTraits<Flt>::fma(x, rcpMod, kMagic) - kMagic;
  x = FloatTraits<Flt>::fma(j, -valHi, x);
  x = FloatTraits<Flt>::fma(j, -valLo, x);
  return j;
}

template <typename Flt>
DISPENSO_INLINE Flt rangeReduce2(
    Flt& x,
    NonDeduced<Flt> rcpMod,
    NonDeduced<Flt> valHi,
    NonDeduced<Flt> valMed,
    NonDeduced<Flt> valLo) {
  const Flt kMagic = FloatTraits<Flt>::kMagic;
  auto j = FloatTraits<Flt>::fma(x, rcpMod, kMagic) - kMagic;
  x = FloatTraits<Flt>::fma(j, -valHi, x);
  x = FloatTraits<Flt>::fma(j, -valMed, x);
  x = FloatTraits<Flt>::fma(j, -valLo, x);
  return j;
}

template <typename Flt>
DISPENSO_INLINE Flt rangeReduce3(
    Flt& x,
    NonDeduced<Flt> rcpMod,
    NonDeduced<Flt> valHi,
    NonDeduced<Flt> valMedHi,
    NonDeduced<Flt> valMedLo,
    NonDeduced<Flt> valLo) {
  const Flt kMagic = FloatTraits<Flt>::kMagic;
  auto j = FloatTraits<Flt>::fma(x, rcpMod, kMagic) - kMagic;
  x = FloatTraits<Flt>::fma(j, -valHi, x);
  x = FloatTraits<Flt>::fma(j, -valMedHi, x);
  x = FloatTraits<Flt>::fma(j, -valMedLo, x);
  x = FloatTraits<Flt>::fma(j, -valLo, x);
  return j;
}

// https://stackoverflow.com/questions/63918873/approximating-cosine-on-0-pi-using-only-single-precision-floating-point

// Cosine has quadrant bias 1, sine has quadrant bias 0
template <typename Flt>
DISPENSO_INLINE Flt sincos_pi_impl(Flt x, Flt j, int phaseShift) {
  /* phase shift of pi/2 (one quadrant) for cosine */

  auto i = convert_to_int(j) + phaseShift;
  Flt r;
  if constexpr (std::is_same_v<Flt, float>) { // For scalars, explicit if/else is faster to ensure
    // we avoid computing both functions
    if (i & 1) {
      r = cos_pi_4(x * x);
    } else {
      r = sin_pi_4(x * x, x);
    }
  } else {
    r = FloatTraits<Flt>::conditional((i & 1) != 0, cos_pi_4(x * x), sin_pi_4(x * x, x));
  }

  return FloatTraits<Flt>::conditional((i & 2) != 0, -r, r);
}

// Combined sin+cos computation. Always evaluates both polynomials (needed for sincos).
// For SIMD types, evaluates both polynomials and blends per-lane.
template <typename Flt>
DISPENSO_INLINE void sincos_pi_impl_both(Flt x, Flt j, Flt* out_sin, Flt* out_cos) {
  auto i = convert_to_int(j);
  auto x2 = x * x;
  auto sv = sin_pi_4(x2, x);
  auto cv = cos_pi_4(x2);

  // For sin: quadrant i. For cos: quadrant i+1 (phase shift).
  auto sin_use_cos = (i & 1) != 0;
  auto cos_use_cos = ((i + 1) & 1) != 0;

  Flt sr = FloatTraits<Flt>::conditional(sin_use_cos, cv, sv);
  Flt cr = FloatTraits<Flt>::conditional(cos_use_cos, cv, sv);

  *out_sin = FloatTraits<Flt>::conditional((i & 2) != 0, -sr, sr);
  *out_cos = FloatTraits<Flt>::conditional(((i + 1) & 2) != 0, -cr, cr);
}

// Pi-reduction dispatch: each function evaluates only its own polynomial.
// j = round(x / pi), so x_reduced in [-pi/2, pi/2].
// sin(x) = (-1)^j * sin(x_reduced), cos(x) = (-1)^j * cos(x_reduced).
template <typename Flt>
DISPENSO_INLINE Flt sin_pi_reduction(Flt x, Flt j) {
  auto ji = convert_to_int(j);
  auto x2 = x * x;
  auto result = sin_pi_2(x2, x);
  return FloatTraits<Flt>::conditional((ji & 1) != 0, -result, result);
}

// Cosine via offset-pi reduction (Highway-style).
// q = 2*trunc(|x|/pi) + 1 maps cos zeros to x_r = 0. Then cos(x) = ±sin(x_r).
// No quadrant blending: one polynomial, sign flip only.
template <typename Flt>
DISPENSO_INLINE Flt cos_offset_pi_reduction(Flt x_r, IntType_t<Flt> qi) {
  auto x2 = x_r * x_r;
  auto result = sin_pi_2(x2, x_r);
  // (q & 2) == 0 → negate (cos starts positive at q=1).
  return FloatTraits<Flt>::conditional((qi & 2) != 0, result, -result);
}

template <typename Flt>
DISPENSO_INLINE Flt tan_pi_2_impl(Flt x, Flt j) {
  auto i = convert_to_int(j);

  auto x2 = x * x;

  auto cv = cos_pi_4(x2);
  auto sv = sin_pi_4(x2, x);

  auto flip = (i & 1) != 0;
  Flt r;
  if constexpr (std::is_same_v<Flt, float>) {
    if (flip) {
      r = cv / sv;
    } else {
      r = sv / cv;
    }
  } else {
    auto n = FloatTraits<Flt>::conditional(flip, cv, sv);
    auto d = FloatTraits<Flt>::conditional(flip, sv, cv);
    r = n / d;
  }

  auto neg = ((i ^ (i + 1)) & 2) != 0;
  r = FloatTraits<Flt>::conditional(neg, -r, r);

  return r;
}

template <typename Flt, typename AccuracyTraits>
DISPENSO_INLINE Flt asin_0_pt5(Flt x) {
  // asin(x) = x + x * x² * P(x²), where P approximates (asin(x)-x)/x³ on [0, 0.5].
  auto x2 = x * x;
  Flt px;
  if constexpr (AccuracyTraits::kMaxAccuracy || AccuracyTraits::kBoundsValues) {
    // Sollya fpminimax((asin(x)-x)/x^3, [|0,2,4,6,8,10|], [|SG...|], [1b-50, 0.5]):
    //   sup-norm error < 2^-27.
    px = dispenso::fast_math::hornerEval(
             x2,
             0x1.1a258cp-5f,
             0x1.10abf6p-6f,
             0x1.ff9494p-6f,
             0x1.6d4032p-5f,
             0x1.3334c8p-4f,
             0x1.555554p-3f) *
        x2;
  } else {
    // Sollya fpminimax((asin(x)-x)/x^3, [|0,2,4,6,8|], [|SG...|], [1b-50, 0.5]):
    //   sup-norm error < 2^-24.
    px = dispenso::fast_math::hornerEval(
             x2, 0x1.3926d2p-5f, 0x1.b1e3acp-6f, 0x1.70bf2ap-5f, 0x1.332688p-4f, 0x1.55555ep-3f) *
        x2;
  }
  return x + x * px;
}

template <typename Flt, typename AccuracyTraits>
DISPENSO_INLINE Flt asin_pt5_1(Flt x) {
  // asin(x) = pi/2 + sqrt(1-x) * P(x), where P approximates (asin(x)-pi/2)/sqrt(1-x)
  // on [0.5, 1].
  constexpr float kPi_2hi = 1.57079637050628662109375f;
  constexpr float kPi_2lo = -4.37113900018624283e-8f;

  Flt y;
  if constexpr (AccuracyTraits::kMaxAccuracy || AccuracyTraits::kBoundsValues) {
    // Sollya fpminimax((asin(x)-pi/2)/sqrt(1-x), 6, [|SG...|], [0.5, 1-1b-23]):
    //   sup-norm error < 2^-32.
    y = dispenso::fast_math::hornerEval(
        x,
        -0x1.b7070cp-11f,
        0x1.72345p-8f,
        -0x1.2f2922p-6f,
        0x1.59358ep-5f,
        -0x1.5f9f5p-4f,
        0x1.b6199ap-3f,
        -0x1.921b8p0f);
  } else {
    // Sollya fpminimax((asin(x)-pi/2)/sqrt(1-x), 5, [|SG...|], [0.5, 1-1b-23]):
    //   sup-norm error < 2^-28.
    y = dispenso::fast_math::hornerEval(
        x,
        0x1.edfaacp-10f,
        -0x1.79d524p-7f,
        0x1.216da6p-5f,
        -0x1.507bb6p-4f,
        0x1.b3ef5cp-3f,
        -0x1.921358p0f);
  }

  // Use compensating sum for better rounding.
  return kPi_2hi + FloatTraits<Flt>::fma(FloatTraits<Flt>::sqrt(1.0f - x), y, kPi_2lo);
}

template <typename Flt, typename AccuracyTraits>
DISPENSO_INLINE std::tuple<IntType_t<Flt>, Flt, Flt> logarithmSep(Flt& x) {
  using IntT = IntType_t<Flt>;
  using UintT = UintType_t<Flt>;
  IntT xi;
  Flt i;
  Flt m;
  auto fma = FloatTraits<Flt>::fma;

  if constexpr (AccuracyTraits::kMaxAccuracy) {
    auto small = x < 1.175494351e-38f;
    x = FloatTraits<Flt>::conditional(small, x * 8388608.0f, x);
    i = FloatTraits<Flt>::conditional(small, Flt(-23.0f), Flt(0.0f));

    xi = bit_cast<IntT>(x);
    // Use unsigned arithmetic to avoid signed overflow UB on negative inputs.
    UintT xu = bit_cast<UintT>(x);
    UintT e = (xu - UintT(0x3f3504f3)) & UintT(0xff800000);
    m = bit_cast<Flt>(xu - e);
    i = fma(Flt(bit_cast<IntT>(e)), 1.19209290e-7f, i);
  } else {
    // Ignore denorms, degradation behavior not that bad
    xi = bit_cast<IntT>(x);
    UintT xu = bit_cast<UintT>(x);
    UintT e = (xu - UintT(0x3f3504f3)) & UintT(0xff800000);
    m = bit_cast<Flt>(xu - e);
    i = Flt(bit_cast<IntT>(e)) * 1.19209290e-7f;
  }
  return {xi, i, m};
}

template <typename Flt>
DISPENSO_INLINE Flt logarithmBounds(Flt x, Flt y, IntType_t<Flt> xi) {
  using IntT = IntType_t<Flt>;
  using UintT = UintType_t<Flt>;
  // Negative-input detection uses unsigned comparison: negative float bit
  // patterns have the sign bit set, making them large unsigned values.  Scalar
  // C++ promotes to unsigned implicitly, but SIMD integer comparison intrinsics
  // are always signed, so we use UintT explicitly for the SIMD path.
  //
  // Condition 3 (negative detection) uses > 0x80000000u (not >= ) so that -0.0f
  // (bit pattern 0x80000000) is excluded — IEEE 754 requires log(-0) = -inf,
  // which is already handled by condition 1 (x == 0.0f).
  if constexpr (std::is_same_v<Flt, float>) {
    if (xi < 0x00800000 || xi >= 0x7f800000) {
      int orbits = bool_apply_or_zero<int>(x == 0.0f, 0xff800000);
      orbits |= bool_apply_or_zero<int>((xi & 0x7f800000) == 0x7f800000, xi);
      orbits |= bool_apply_or_zero<int>(static_cast<uint32_t>(xi) > 0x80000000u, 0x7f8fffff);
      y = orbits ? bit_cast<float>(orbits) : y;
    }
  } else {
    IntT orbits = bool_apply_or_zero<IntT>(x == 0.0f, 0xff800000);
    orbits |= bool_apply_or_zero<IntT>((xi & 0x7f800000) == 0x7f800000, xi);
    orbits |= bool_apply_or_zero<IntT>(bit_cast<UintT>(xi) > UintT(0x80000000), 0x7f8fffff);
    // Use a proper mask (all-ones/all-zeros) for conditional, since blendv
    // only checks bit 31.  Raw orbits values like 0x7F800000 (+inf) have bit 31
    // clear and would be incorrectly treated as "false".
    auto hasBound = orbits != IntT(0);
    y = FloatTraits<Flt>::conditional(hasBound, bit_cast<Flt>(orbits), y);
  }
  return y;
}

template <typename Flt, typename AccuracyTraits>
DISPENSO_INLINE Flt atan_poly(Flt x) {
  // atan(x) ≈ x * P(x), where P approximates atan(x)/x on [0, 1].
  // Horner (not Estrin): Estrin regresses atan ULP from 3 to 4 and is slower
  // on scalar due to register pressure from the x-power tree.
  Flt y;
  if constexpr (AccuracyTraits::kMaxAccuracy || AccuracyTraits::kBoundsValues) {
    // Sollya fpminimax(atan(x)/x, 11, [|SG...|], [1b-50, 1]):
    //   sup-norm error < 2^-31.
    y = dispenso::fast_math::hornerEval(
        x,
        -0x1.1592cap-6f,
        0x1.a09c1cp-4f,
        -0x1.f68112p-3f,
        0x1.150f26p-2f,
        -0x1.e2e56p-5f,
        -0x1.0e262ep-3f,
        -0x1.d8f3p-15f,
        0x1.98c9aap-3f,
        0x1.352e9p-14f,
        -0x1.5556b2p-2f,
        0x1.f2bd6p-24f,
        0x1p0f);
  } else {
    // Sollya fpminimax(atan(x)/x, 9, [|SG...|], [1b-50, 1]):
    //   sup-norm error < 2^-26.
    y = dispenso::fast_math::hornerEval(
        x,
        0x1.1f76cap-6f,
        -0x1.cc3324p-4f,
        0x1.252662p-2f,
        -0x1.530ee2p-2f,
        0x1.2ca298p-4f,
        0x1.76789ap-3f,
        0x1.29d66ep-9f,
        -0x1.557c3cp-2f,
        0x1.c39d04p-19f,
        0x1p0f);
  }
  return y * x;
}

} // namespace detail
} // namespace fast_math
} // namespace dispenso


================================================
FILE: dispenso/fast_math/fast_math.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * EXPERIMENTAL — This sublibrary is under active development.
 * The API is unstable and subject to breaking changes without notice.
 * Do not depend on it in production code.
 */

#pragma once

#include <array>

namespace dispenso {
namespace fast_math {

struct DefaultAccuracyTraits {
  // Check for out-of-range, NaN, Inf inputs and ensure outputs match std.  Note that functions may
  // still provide correct values out of bounds, but if this is false, they are not required to.
  static constexpr bool kBoundsValues = false;
  // Try to get the lowest possible errors to match std.
  static constexpr bool kMaxAccuracy = false;
};

struct MaxAccuracyTraits {
  // Check for out-of-range, NaN, Inf inputs and ensure outputs match std
  static constexpr bool kBoundsValues = true;
  // Try to get the lowest possible errors to match std.
  static constexpr bool kMaxAccuracy = true;
};

} // namespace fast_math
} // namespace dispenso

#include "detail/double_promote.h"
#include "detail/fast_math_impl.h"

namespace dispenso {
namespace fast_math {

namespace detail {

// AccuracyTraits adapter for internal pow use: always uses best polynomial,
// inherits kBoundsValues from the outer traits.
template <typename AccuracyTraits>
struct PowInternalTraits {
  static constexpr bool kMaxAccuracy = true;
  static constexpr bool kBoundsValues = AccuracyTraits::kBoundsValues;
};

// Scalar integer parity check on float bits.
// Returns 0 if y is not an integer, 1 if even integer, 2 if odd integer.
// Works regardless of sign bit (sign is masked off from exponent field).
// Branching version — used inside the rare negative-x path.
DISPENSO_INLINE int32_t checkint(uint32_t iy) {
  int32_t e = static_cast<int32_t>(iy >> 23) & 0xff;
  if (e < 127)
    return 0; // |y| < 1 → not integer (includes 0, subnormals)
  if (e > 127 + 23)
    return 1; // |y| >= 2^24 → ULP ≥ 2, only even integers representable
  // 127 <= e <= 150: check mantissa bits below the integer point.
  uint32_t shift = static_cast<uint32_t>(127 + 23 - e);
  if (iy & ((1u << shift) - 1u))
    return 0; // fractional mantissa bits → not integer
  if (iy & (1u << shift))
    return 2; // lowest integer bit set → odd
  return 1; // even integer
}

// Branchless integer parity check for the scalar non-bounds path.
// Returns 0 if not integer, 1 if even integer, 2 if odd integer.
// For the non-bounds default path, we want to avoid branches on the hot path.
// The negative-x case is rare, but mixed-sign workloads benefit from branchless.
DISPENSO_INLINE int32_t checkint_branchless(uint32_t iy) {
  int32_t e = static_cast<int32_t>(iy >> 23) & 0xff;
  // Clamp shift to [0, 31] to avoid UB from shifting >= 32. When e < 127
  // (|y| < 1, not integer), the clamped shift reads a garbage bit, but
  // is_int below is false so the garbage is never used.
  uint32_t shift = static_cast<uint32_t>(127 + 23 - e) & 31u;
  uint32_t has_frac = iy & ((1u << shift) - 1u);
  uint32_t odd_bit = (iy >> shift) & 1u;
  // is_int: e >= 127 and no fractional bits (or e > 150, always integer).
  // For e > 150: shift would be negative (clamped to garbage), but we force result = 1.
  int32_t is_int = (e >= 127) & (has_frac == 0);
  int32_t is_large = (e > 127 + 23); // always even
  // Result: 0 if not int, 1 if even int, 2 if odd int.
  // odd_bit is meaningful only when is_int && !is_large.
  return is_int + (is_int & static_cast<int32_t>(odd_bit) & ~is_large);
}

// Scalar double-precision pow core: exp2(y * log2(|x|)) entirely in double.
// Float inputs/output, double internal arithmetic.
//
// log2: 16-entry table of {1/c, log2(c)} eliminates division. Degree-4
//   polynomial (Sollya fpminimax) approximates log2(1+r)/r with pipelined
//   evaluation. Table centers at OFF + i*2^19 + 2^18, max |r| < 0.0297.
// exp2: 32-entry table of 2^(k/32) stored as uint64 with pre-subtracted
//   exponent bits. Degree-3 polynomial (Sollya fpminimax) on [-1/64, 1/64].
//
// Total: ~8 double muls + ~5 double adds in two short parallel chains.
// Achieves <1 ULP for float output.

// log2 table: 16 entries, each {1/c, log2(c)} where c is the center of
// the ith subinterval of [OFF, 2*OFF] (OFF = 0x3f330000).
struct PowLog2Entry {
  double invc;
  double logc;
};

// EXP2F_TABLE_BITS = 5 → 32 entries.
// tab[i] = uint64(2^(i/32)) - (i << (52-5)).
// To reconstruct 2^(k/32): double(tab[k%32] + (k << 47)).
constexpr uint64_t kPowExp2Tab[32] = {
    0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
    0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
    0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
    0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
    0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
    0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
    0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
    0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
};

constexpr PowLog2Entry kPowLog2Tab[16] = {
    {0x1.661ec6a5122f9p+0, -0x1.efec61b011f85p-2}, // i=0,  c=0.71484375
    {0x1.571ed3c506b3ap+0, -0x1.b0b67f4f46810p-2}, // i=1,  c=0.74609375
    {0x1.49539e3b2d067p+0, -0x1.7418acebbf18fp-2}, // i=2,  c=0.77734375
    {0x1.3c995a47babe7p+0, -0x1.39de8e1559f6fp-2}, // i=3,  c=0.80859375
    {0x1.30d190130d190p+0, -0x1.01d9bbcfa61d4p-2}, // i=4,  c=0.83984375
    {0x1.25e22708092f1p+0, -0x1.97c1cb13c7ec1p-3}, // i=5,  c=0.87109375
    {0x1.1bb4a4046ed29p+0, -0x1.2f9e32d5bfdd1p-3}, // i=6,  c=0.90234375
    {0x1.12358e75d3033p+0, -0x1.960caf9abb7cap-4}, // i=7,  c=0.93359375
    {0x1.0953f39010954p+0, -0x1.a6f9c377dd31bp-5}, // i=8,  c=0.96484375
    {0x1p+0, 0x0p+0}, // i=9,  c=1.0 (exact)
    {0x1.e573ac901e574p-1, 0x1.3aa2fdd27f1c3p-4}, // i=10, c=1.05468750
    {0x1.ca4b3055ee191p-1, 0x1.476a9f983f74dp-3}, // i=11, c=1.11718750
    {0x1.b2036406c80d9p-1, 0x1.e840be74e6a4dp-3}, // i=12, c=1.17968750
    {0x1.9c2d14ee4a102p-1, 0x1.406463b1b0449p-2}, // i=13, c=1.24218750
    {0x1.886e5f0abb04ap-1, 0x1.88e9c72e0b226p-2}, // i=14, c=1.30468750
    {0x1.767dce434a9b1p-1, 0x1.ce0a4923a587dp-2}, // i=15, c=1.36718750
};

// log2(1+r)/r polynomial coefficients (degree 4, Sollya fpminimax).
// Relative error < 2^-32 on |r| < 0.0297.
constexpr double kPowLog2Poly[5] = {
    0x1.27bc533354f16p-2, // P[0]: r^4 coeff
    -0x1.719a02b5d4c08p-2, // P[1]: r^3 coeff
    0x1.ec70982733e8fp-2, // P[2]: r^2 coeff
    -0x1.7154745c0af07p-1, // P[3]: r   coeff (approx -1/(2*ln2))
    0x1.71547652bc3ep0, // P[4]: r^0 coeff (approx 1/ln2)
};

// exp2 polynomial coefficients (degree 3 on [-1/64, 1/64], Sollya fpminimax).
// Relative error < 2^-27 on (2^r - 1)/r.
constexpr double kPowExp2Poly[3] = {
    0x1.c6b08d7047f43p-5, // C[0]: r^2 coeff
    0x1.ebfccc582d012p-3, // C[1]: r   coeff
    0x1.62e42fefe5286p-1, // C[2]: r^0 coeff (approx ln2)
};

DISPENSO_INLINE float pow_double_core(float ax, float y) {
  constexpr uint32_t kOff = 0x3f330000;

  // --- log2 ---
  uint32_t ix = bit_cast<uint32_t>(ax);
  uint32_t tmp = ix - kOff;
  int32_t i = (tmp >> (23 - 4)) & 15; // table index
  uint32_t top = tmp & 0xff800000u;
  uint32_t iz = ix - top;
  int32_t k = static_cast<int32_t>(top) >> 23; // exponent (arithmetic shift)
  double invc = kPowLog2Tab[i].invc;
  double logc = kPowLog2Tab[i].logc;
  double z = static_cast<double>(bit_cast<float>(iz));

  // log2(x) = log1p(z/c - 1)/ln2 + log2(c) + k
  double r = z * invc - 1.0;
  double y0 = logc + static_cast<double>(k);

  // Pipelined polynomial: two parallel chains merged via r4.
  double r2 = r * r;
  double p0 = kPowLog2Poly[0] * r + kPowLog2Poly[1];
  double p1 = kPowLog2Poly[2] * r + kPowLog2Poly[3];
  double r4 = r2 * r2;
  double p2 = kPowLog2Poly[4] * r + y0;
  p2 = p1 * r2 + p2;
  double logx = p0 * r4 + p2;

  // --- y * log2(x) ---
  double ylogx = static_cast<double>(y) * logx;

  // Overflow/underflow check.
  // Check if |ylogx| >= 126 using exponent bits.
  uint64_t ylogx_bits = bit_cast<uint64_t>(ylogx);
  if ((ylogx_bits >> 47 & 0xffff) >= (bit_cast<uint64_t>(126.0) >> 47)) {
    if (ylogx > 0x1.fffffffd1d571p+6) {
      return std::numeric_limits<float>::infinity();
    }
    if (ylogx <= -150.0) {
      return 0.0f;
    }
  }

  // --- exp2 ---
  // x = k/N + r with r in [-1/(2N), 1/(2N)], N=32.
  constexpr double kShift = 0x1.8p+52 / 32.0; // magic for round-to-nearest
  double kd = ylogx + kShift;
  uint64_t ki = bit_cast<uint64_t>(kd);
  kd -= kShift; // k/N, rounded
  double rd = ylogx - kd;

  // exp2(x) = 2^(k/N) * (C0*r^3 + C1*r^2 + C2*r + 1)
  //         = s * poly(r)
  uint64_t t = kPowExp2Tab[ki & 31];
  t += ki << (52 - 5); // add exponent
  double s = bit_cast<double>(t);

  double r2e = rd * rd;
  double pe = kPowExp2Poly[0] * rd + kPowExp2Poly[1];
  double qe = kPowExp2Poly[2] * rd + 1.0;
  qe = pe * r2e + qe;
  return static_cast<float>(qe * s);
}

// Hybrid double-precision pow core: table-assisted log2 + tableless exp2.
// Float inputs/output, double internal arithmetic via DoubleVec<Flt>.
//
// log2: SIMD range reduction (float/uint32) extracts a 4-bit table index per
//   lane. Scalar lookups fetch {1/c, log2(c)} from the 16-entry table, packed
//   into DoubleVec. The table narrows |r| to <0.03, enabling a degree-4
//   polynomial (vs degree-10 tableless). 8 scalar loads total (4 lanes × 2
//   values), always L1-hot (256 bytes).
// exp2: tableless degree-5 polynomial via exp2_split (same as poly core).
//
// Works for both scalar (Flt=float) and SIMD (Flt=SseFloat, etc.).
template <typename Flt>
DISPENSO_INLINE Flt pow_double_hybrid_core(Flt ax, Flt y) {
  using DV = DoubleVec<Flt>;
  using IntT = IntType_t<Flt>;
  using UintT = UintType_t<Flt>;

  // --- Range reduction (SIMD, same as pow_double_core) ---
  // Note: does NOT handle subnormal ax (ix < 0x00800000). Callers must
  // catch subnormals in is_special and fall back to pow_double_poly_core.
  constexpr uint32_t kOff = 0x3f330000;
  UintT ix = bit_cast<UintT>(ax);
  UintT tmp = ix - UintT(kOff);
  IntT i = (bit_cast<IntT>(tmp) >> 19) & IntT(15); // 4-bit table index
  UintT top = tmp & UintT(0xff800000u);
  UintT iz = ix - top;
  IntT k = bit_cast<IntT>(top) >> 23; // exponent (arithmetic shift)

  // --- Table lookups: 4 lanes × {invc, logc} = 8 scalar loads ---
  // Table is 16 × {double, double} = 256 bytes, stride 2 doubles per entry.
  const double* tab = reinterpret_cast<const double*>(kPowLog2Tab);
  IntT i2 = i + i; // stride: 2 doubles per entry
  DV invc = DV::gather(tab, i2); // tab[2*i + 0] = invc
  DV logc = DV::gather(tab, i2 + IntT(1)); // tab[2*i + 1] = logc

  // --- log2 in double ---
  DV z = DV::from_float(bit_cast<Flt>(iz));
  DV r = z * invc - DV(1.0);
  DV y0 = logc + DV::from_float(Flt(k));

  // Degree-4 Horner for log2 (chain too short for Estrin benefit).
  // log2(x) = y0 + r * P(r), |r| < 0.03.
  DV p(kPowLog2Poly[0]);
  p = fma(p, r, DV(kPowLog2Poly[1]));
  p = fma(p, r, DV(kPowLog2Poly[2]));
  p = fma(p, r, DV(kPowLog2Poly[3]));
  p = fma(p, r, DV(kPowLog2Poly[4]));
  DV logx = fma(p, r, y0);

  // --- y * log2(x) ---
  DV ylogx = DV::from_float(y) * logx;

  // Clamp to valid double exponent range.
  ylogx = clamp(ylogx, DV(-1022.0), DV(1023.0));

  // --- Tableless exp2 in double ---
  auto [re, scale] = exp2_split(ylogx);

  // Degree-5 Horner for exp2 (chain too short for Estrin benefit).
  DV q(0x1.4308fabc18decp-13);
  q = fma(q, re, DV(0x1.5f07b4611e454p-10));
  q = fma(q, re, DV(0x1.3b2b9fbd8970fp-7));
  q = fma(q, re, DV(0x1.c6af6e92be375p-5));
  q = fma(q, re, DV(0x1.ebfbdec29c82ap-3));
  q = fma(q, re, DV(0x1.62e4302eeb44dp-1));

  DV result = fma(scale, re * q, scale);
  return result.to_float();
}

// Tableless double-precision pow core: exp2(y * log2(|x|)) entirely in double.
// Float inputs/output, double internal arithmetic via DoubleVec<Flt>.
//
// log2: Uses logarithmSep range reduction (float/uint32) to get exponent i and
//   mantissa m in [sqrt(0.5), sqrt(2)]. Then t = (double)m - 1.0 (exact in
//   double) and log2(1+t) is evaluated as t * P(t) where P is a degree-10
//   Sollya fpminimax polynomial (~30 bits).
//   No tables, no division, no error tracking.
// exp2: Tableless double-precision polynomial. Range reduce to integer n and
//   fractional r ∈ [-0.5, 0.5], then 2^x = 2^n * (1 + r*Q(r)) where Q is a
//   degree-5 Sollya fpminimax polynomial (~27 bits). No tables.
//
// Works for both scalar (Flt=float) and SIMD (Flt=SseFloat, etc.).
template <typename Flt>
DISPENSO_INLINE Flt pow_double_poly_core(Flt ax, Flt y) {
  using DV = DoubleVec<Flt>;

  // --- Range reduction (float/uint32) ---
  // logarithmSep gives us exponent i and mantissa m in [sqrt(0.5), sqrt(2)].
  auto [xi, i_f, m] = logarithmSep<Flt, MaxAccuracyTraits>(ax);

  // --- Widen to double BEFORE subtraction ---
  // m - 1 in float loses ~17 bits when m ≈ 1 (catastrophic cancellation).
  // In double, (double)m - 1.0 is exact since every float is exactly
  // representable in double, and the result fits in 24 mantissa bits.
  DV t = DV::from_float(m) - DV(1.0);
  DV i_d = DV::from_float(i_f);

  // --- Double polynomial: log2(1+t) = t * P(t), degree 10 (Estrin) ---
  // Sollya fpminimax on [-0.293, 0.414], double precision.
  // Sup-norm error: 1.10e-9 (~30 bits).
  // Estrin's scheme evaluates independent sub-polynomials in parallel,
  // reducing latency vs Horner by exploiting ILP (multiple independent FMAs
  // at each level feed both FMA ports simultaneously).
  // P(t) = (c0+c1*t) + t²*(c2+c3*t) + t⁴*(c4+c5*t) + t⁶*(c6+c7*t)
  //       + t⁸*(c8+c9*t+c10*t²)
  //
  // Level 0: t², and five independent coefficient pairs.
  DV t2 = t * t;
  DV p01 = fma(DV(-0x1.7154762acdfffp-1), t, DV(0x1.71547656a2495p+0));
  DV p23 = fma(DV(-0x1.71548ef022a84p-2), t, DV(0x1.ec707f060e35ep-2));
  DV p45 = fma(DV(-0x1.ec7a94d1207adp-3), t, DV(0x1.278083c622fbcp-2));
  DV p67 = fma(DV(-0x1.6e5f6f8760a8ap-3), t, DV(0x1.a3ee210f50f0fp-3));
  DV p89 = fma(DV(-0x1.5a6abc244ec79p-3), t, DV(0x1.60552656955f2p-3));
  // Level 1: t⁴, combine pairs with t².
  DV t4 = t2 * t2;
  DV q03 = fma(p23, t2, p01);
  DV q47 = fma(p67, t2, p45);
  DV q8A = fma(DV(0x1.8e0e8c535162ap-4), t2, p89); // c10*t² + (c8+c9*t)
  // Level 2: t⁸, combine quads with t⁴.
  DV t8 = t4 * t4;
  DV r07 = fma(q47, t4, q03);
  // Level 3: final combination.
  DV p = fma(q8A, t8, r07);

  // log2(1+t) = t * P(t); log2(x) = i + log2(m)
  DV log2x = i_d + t * p;

  // --- y * log2(x) ---
  DV ylogx = DV::from_float(y) * log2x;

  // Clamp to valid double exponent range [-1022, 1023].
  // This prevents invalid bit patterns in exp2_split while preserving
  // correct float narrowing: 2^(-1022) → 0.0f, 2^1023 → inf.
  ylogx = clamp(ylogx, DV(-1022.0), DV(1023.0));

  // --- Tableless exp2 in double ---
  auto [r, scale] = exp2_split(ylogx);

  // 2^r = 1 + r * Q(r), degree-5 Sollya fpminimax on [-0.5, 0.5].
  // Q(r) ≈ (2^r - 1)/r. Sup-norm error: 8.88e-9 (~27 bits).
  DV q(0x1.4308fabc18decp-13); // e5
  q = fma(q, r, DV(0x1.5f07b4611e454p-10)); // e4
  q = fma(q, r, DV(0x1.3b2b9fbd8970fp-7)); // e3
  q = fma(q, r, DV(0x1.c6af6e92be375p-5)); // e2
  q = fma(q, r, DV(0x1.ebfbdec29c82ap-3)); // e1
  q = fma(q, r, DV(0x1.62e4302eeb44dp-1)); // e0

  // 2^ylogx = scale * (1 + r * q) = scale + scale * r * q
  DV result = fma(scale, r * q, scale);
  return result.to_float();
}

// --- Scalar pow IEEE 754 special-case helpers ---

// Handle y = 0, inf, or NaN for scalar pow bounds path.
// Returns true if the result is determined (stored in *out).
//
// Bit tricks used throughout:
//   2u * val          — left-shifts by 1, discarding the sign bit. This maps
//                       ±0 to 0, ±inf to 0xFF000000, and NaN to > 0xFF000000.
//   2u * val - 1u     — maps ±0 to UINT_MAX (wraps), normal floats to a
//                       positive range, and inf/NaN to >= 0xFF000000 - 1.
//                       So "2u*val - 1u >= 2u*0x7f800000u - 1u" tests whether
//                       val is zero, inf, or NaN in one branch-free comparison.
//   ix & 0x7FFFFFFFu  — clears the sign bit, giving |x| as raw bits.
DISPENSO_INLINE bool pow_scalar_y_special(float x, float y, uint32_t ix, uint32_t iy, float* out) {
  if (2u * iy == 0u) {
    *out = 1.0f;
    return true; // pow(x, ±0) = 1
  }
  if (ix == 0x3f800000u) {
    *out = 1.0f;
    return true; // pow(1, y) = 1 even for NaN y
  }
  // Either x or y is NaN → propagate via x + y (IEEE 754 NaN arithmetic).
  if (2u * (ix & 0x7fffffffu) > 2u * 0x7f800000u || 2u * iy > 2u * 0x7f800000u) {
    *out = x + y;
    return true;
  }
  // |x| == 1 (either +1 or -1) and y is ±inf → result is 1.
  if (2u * (ix & 0x7fffffffu) == 2u * 0x3f800000u) {
    *out = 1.0f;
    return true;
  }
  // y is ±inf (NaN cases handled above). Result is 0 or +inf depending on
  // whether |x| < 1 and the sign of y:
  //   |x| < 1, y = +inf → 0    |x| > 1, y = +inf → +inf
  //   |x| < 1, y = -inf → +inf |x| > 1, y = -inf → 0
  if ((2u * (ix & 0x7fffffffu) < 2u * 0x3f800000u) == !(iy & 0x80000000u)) {
    *out = 0.0f;
    return true;
  }
  *out = y * y; // +inf (y*y overflows to +inf for finite y, identity for ±inf)
  return true;
}

// Handle negative/zero/inf/subnormal x for scalar pow bounds path.
// Called when x is outside the normal positive range (the fast-path condition
// in the caller already filtered normal positive x). Returns true if the
// result is determined (stored in *out). Otherwise adjusts ix (to |x|, with
// subnormals normalized) and sign_bias for the core computation.
DISPENSO_INLINE bool
pow_scalar_x_special(uint32_t& ix, uint32_t iy, uint32_t& sign_bias, float* out) {
  if (ix & 0x80000000u) {
    // Negative x: check integer parity of y to determine sign of result.
    // checkint returns: 0 = not integer, 1 = even integer, 2 = odd integer.
    int32_t yint = checkint(iy);
    if (yint == 2)
      sign_bias = 0x80000000u; // odd integer y → negate final result
    ix &= 0x7fffffffu; // work with |x| from here
    if (ix == 0 || ix == 0x7f800000u) {
      // -0 or -inf: |x|^2 maps 0→0 and inf→inf. Then apply sign and
      // reciprocal (negative y inverts: pow(-0, -3) = -inf).
      float x2 = bit_cast<float>(ix) * bit_cast<float>(ix);
      if (iy & 0x80000000u)
        x2 = 1.0f / x2;
      *out = bit_cast<float>(bit_cast<uint32_t>(x2) ^ sign_bias);
      return true;
    }
    if (yint == 0) {
      *out = std::numeric_limits<float>::quiet_NaN(); // neg ^ non-integer = NaN
      return true;
    }
    // Negative finite nonzero x with integer y: fall through to core with |x|.
  }
  // Test for +0 or +inf. "2u*ix - 1u >= 2u*0x7f800000u - 1u" is true when
  // ix is 0 (wraps to UINT_MAX) or >= 0x7f800000 (inf/NaN, but NaN was
  // already handled by the caller's y-special path).
  if (2u * ix - 1u >= 2u * 0x7f800000u - 1u) {
    float x2 = bit_cast<float>(ix) * bit_cast<float>(ix);
    if (iy & 0x80000000u)
      x2 = 1.0f / x2;
    *out = x2;
    return true;
  }
  if (ix < 0x00800000u) {
    // Subnormal x: scale by 2^23 to normalize, then subtract 23 from the
    // biased exponent so the log2 core sees the correct value.
    ix = bit_cast<uint32_t>(bit_cast<float>(ix) * 0x1p23f);
    ix -= 23u << 23;
  }
  return false; // Fall through to core computation with adjusted ix.
}

// SIMD pow fixup for special inputs (zero, subnormal, inf, NaN).
// The double-precision cores produce garbage for these because logarithmSep's
// range reduction (exponent extraction via integer subtract) is undefined for
// zero/inf/NaN bit patterns. This cold-path fixup corrects only the affected
// lanes; the branch is almost never taken for normal workloads.
template <typename Flt>
DISPENSO_INLINE void pow_simd_special_fixup(Flt ax, Flt y, Flt& r) {
  using IntT = IntType_t<Flt>;
  using UintT = UintType_t<Flt>;
  IntT axi = bit_cast<IntT>(ax);
  // Detect special lanes: subnormal (exponent field 0, mantissa nonzero),
  // zero (all bits 0), inf (0x7F800000), NaN (> 0x7F800000).
  // nonnormal() checks exponent == 0xFF (inf/NaN). Combined with < 0x00800000
  // (subnormal/zero), this covers all non-normal inputs.
  auto is_special = (axi < IntT(0x00800000)) | nonnormal<Flt>(axi);
  if (DISPENSO_EXPECT(any_true(is_special), 0)) {
    // Subnormals: the hybrid core's range reduction (ix - kOff) can't handle
    // exponent-0 bit patterns. Recompute these lanes via poly core, whose
    // logarithmSep prescales subnormals by 2^23 before extraction.
    auto is_subnorm = (axi > IntT(0)) & (axi < IntT(0x00800000));
    if (any_true(is_subnorm)) {
      Flt r_sub = pow_double_poly_core(ax, y);
      r = FloatTraits<Flt>::conditional(is_subnorm, r_sub, r);
    }
    // Zero/inf/NaN lanes: use an integer trick that swaps 0↔inf in one op.
    // IEEE 754 bit layout: 0 = 0x00000000, +inf = 0x7F800000.
    // 0x7F800000 - 0x00000000 = 0x7F800000 (+inf)  → pow(0, pos) = 0, pow(0, neg) = inf
    // 0x7F800000 - 0x7F800000 = 0x00000000 (0)     → pow(inf, pos) = inf, pow(inf, neg) = 0
    // 0x7F800000 - (NaN bits) wraps negative, which reinterprets as NaN
    // (sign bit irrelevant for NaN). So:
    //   y > 0 → result = ax (0 stays 0, inf stays inf)
    //   y < 0 → result = flipped (0↔inf swap)
    //   NaN   → stays NaN either way
    auto is_zinf = (axi == IntT(0)) | nonnormal<Flt>(axi);
    UintT ax_bits = bit_cast<UintT>(ax);
    Flt flipped = bit_cast<Flt>(UintT(0x7F800000u) - ax_bits);
    Flt special_r = FloatTraits<Flt>::conditional(y < 0.0f, flipped, ax);
    r = FloatTraits<Flt>::conditional(is_zinf, special_r, r);
  }
}

// Scalar-exponent fast paths: exact results for common y values and
// binary squaring for small integer exponents. Sets y_is_int and y_is_odd
// as output for the caller's sign-handling path when returning false.
template <typename Flt>
DISPENSO_INLINE bool
pow_scalar_y_fast_path(Flt x, float y, bool& y_is_int, bool& y_is_odd, Flt& out) {
  using UintT = UintType_t<Flt>;
  if (y == 0.0f) {
    out = Flt(1.0f);
    return true;
  }
  if (y == 1.0f) {
    out = x;
    return true;
  }
  if (y == -1.0f) {
    out = Flt(1.0f) / x;
    return true;
  }
  if (y == 0.5f) {
    constexpr float kNaN = std::numeric_limits<float>::quiet_NaN();
    auto negx = x < 0.0f;
    out = FloatTraits<Flt>::conditional(negx, Flt(kNaN), FloatTraits<Flt>::sqrt(x));
    return true;
  }
  if (y == 2.0f) {
    out = x * x;
    return true;
  }

  float ay = std::fabs(y);
  y_is_int = (std::floor(y) == y);
  y_is_odd = y_is_int && (std::fmod(ay, 2.0f) == 1.0f);

  if (y_is_int && ay < 64.0f) {
    // Binary squaring with uniform SIMD multiplies.
    int32_t n = static_cast<int32_t>(ay);
    Flt result(1.0f);
    Flt base = fabs(x);
    while (n > 0) {
      if (n & 1)
        result = result * base;
      base = base * base;
      n >>= 1;
    }
    if (y < 0.0f)
      result = Flt(1.0f) / result;
    if (y_is_odd) {
      UintT xsign = bit_cast<UintT>(x) & UintT(0x80000000u);
      result = bit_cast<Flt>(bit_cast<UintT>(result) ^ xsign);
    }
    out = result;
    return true;
  }
  return false;
}

} // namespace detail

// Compile-time check that Flt is a supported type: float, a SIMD float wrapper,
// or a raw intrinsic type that maps to one via SimdTypeFor.
template <typename Flt>
constexpr void assert_float_type() {
  // For raw intrinsics (__m128 etc.), SimdType_t maps to the wrapper (SseFloat etc.).
  // For wrappers and float, SimdType_t is identity. Check that FloatTraits exists
  // by verifying the kOne constant is present (only defined for float-based types).
  static_assert(
      FloatTraits<SimdType_t<Flt>>::kOne == 0x3f800000,
      "fast_math only supports float and float-based SIMD types (SseFloat, AvxFloat, etc.)");
}

/**
 * @brief Hardware-delegated square root.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Accepted but ignored; result is always 0 ULP.
 * @param x Input value.
 * @return Square root of @p x. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt sqrt(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return FloatTraits<SimdType_t<Flt>>::sqrt(SimdType_t<Flt>(x)).v;
  } else {
    return std::sqrt(x);
  }
}

/**
 * @brief Cube root approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 12 ULP, MaxAccuracy: 3 ULP.
 *   kBoundsValues: returns input for inf/NaN/zero.
 * @param x Input value (all float domain, including denormals).
 * @return Cube root of @p x. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt cbrt(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    // Raw intrinsic type (__m128, Vec512<float>, etc.) — forward to wrapper.
    return cbrt<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    // Scalar path: branchless for the normal case.
    // Denormals get prescale=2^24, postscale=1/256 (cmov).
    // Zero is zeroed via AND mask (no blend).
    uint32_t ui = bit_cast<uint32_t>(x);
    uint32_t usgn = ui & 0x80000000u;
    uint32_t uabs = ui & 0x7fffffffu;

    if constexpr (AccuracyTraits::kBoundsValues) {
      if (DISPENSO_EXPECT((uabs & 0x7f800000u) == 0x7f800000u, 0)) {
        return x; // inf/NaN
      }
      if (DISPENSO_EXPECT(uabs == 0u, 0)) {
        return x; // ±0 (preserves -0)
      }
    }

    // expZero covers both zero and denormal; zero is handled by AND mask below.
    bool expZero = (uabs & 0x7f800000u) == 0u;
    float prescale = expZero ? 16777216.0f : 1.0f;
    float postscale = expZero ? (1.0f / 256.0f) : 1.0f;

    float absX = bit_cast<float>(uabs) * prescale;
    int32_t iScaled = static_cast<int32_t>(bit_cast<uint32_t>(absX));
    float sgnx = bit_cast<float>(usgn | 0x3f800000u);
    float result = sgnx / detail::rcp_cbrt<float, AccuracyTraits>(absX, iScaled);
    result *= postscale;

    // Zero mask: AND zeros out result for zero input, OR sign bit back in
    // so that cbrt(-0) = -0.
    uint32_t zeroMask = 0u - static_cast<uint32_t>(uabs != 0u);
    return bit_cast<float>((bit_cast<uint32_t>(result) & zeroMask) | usgn);
  } else {
    // SIMD wrapper path: prescale/postscale multipliers + AND mask for zero.
    using IntT = IntType_t<Flt>;
    IntT i = bit_cast<IntT>(x);
    IntT isgn = i & 0x80000000;
    i &= 0x7fffffff;

    // expZero covers both zero and denormal.
    auto expZero = (i & 0x7f800000) == 0;

    Flt prescale = FloatTraits<Flt>::conditional(expZero, Flt(16777216.0f), Flt(1.0f));
    Flt postscale = FloatTraits<Flt>::conditional(expZero, Flt(1.0f / 256.0f), Flt(1.0f));

    Flt absX = bit_cast<Flt>(i) * prescale;
    IntT iScaled = bit_cast<IntT>(absX);

    Flt sgnx = bit_cast<Flt>(isgn | FloatTraits<Flt>::kOne);
    Flt result = sgnx / detail::rcp_cbrt<Flt, AccuracyTraits>(absX, iScaled);
    result = result * postscale;

    // Zero: AND mask zeroes result, OR sign bit back so cbrt(-0) = -0.
    auto zeroMask = bool_as_mask<IntT>(i != 0);
    result = bit_cast<Flt>((bit_cast<IntT>(result) & zeroMask) | isgn);

    if constexpr (AccuracyTraits::kBoundsValues) {
      return FloatTraits<Flt>::conditional(nonnormal<Flt>(iScaled), x, result);
    }
    return result;
  }
}

/**
 * @brief Decompose a float into mantissa and exponent (bit-accurate for normals).
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Accepted but ignored; result is always bit-accurate.
 * @param x Input value.
 * @param eptr Pointer to receive the exponent.
 * @return Mantissa in [0.5, 1) for normal values. Returns @p x unchanged for
 *   inf/NaN/zero/subnormal. Subnormals are passed through with *eptr = 0
 *   rather than normalized into [0.5, 1) as std::frexp does. This means the
 *   mantissa contract is violated for subnormals, but ldexp(frexp(x)) == x
 *   still holds (the round-trip is an identity). Code that depends on a
 *   normalized mantissa (e.g., counting significant bits) should not use
 *   subnormal inputs.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt frexp(Flt x, IntType_t<Flt>* eptr) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return frexp<SimdType_t<Flt>>(SimdType_t<Flt>(x), eptr).v;
  } else {
    using IntT = IntType_t<Flt>;
    IntT ix = bit_cast<IntType_t<Flt>>(x);
    *eptr = 0;
    return FloatTraits<Flt>::conditional(
        nonnormalOrZero<Flt>(ix), x, detail::frexpImpl<Flt>(ix, eptr));
  }
}

/**
 * @brief Multiply a float by a power of 2 (bit-accurate).
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Accepted but ignored; result is always bit-accurate.
 * @param x Input value.
 * @param e Exponent to apply (x * 2^e).
 * @return @p x * 2^e. Returns @p x unchanged for inf/NaN.
 *   Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt ldexp(Flt x, IntType_t<Flt> e) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return ldexp<SimdType_t<Flt>>(SimdType_t<Flt>(x), e).v;
  } else {
    using IntT = IntType_t<Flt>;
    IntT hx = bit_cast<IntT>(x);

    return FloatTraits<Flt>::conditional(nonnormal<Flt>(hx), x, detail::ldexpImpl<Flt>(hx, e));
  }
}

/**
 * @brief Arc cosine approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 3 ULP. kMaxAccuracy: 2 ULP (degree-8 polynomial).
 * @param x Input value in [-1, 1].
 * @return Arc cosine of @p x in radians. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt acos(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return acos<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    BoolType_t<Flt> xneg = x < 0.0f;
    // abs
    UintType_t<Flt> xi = bit_cast<UintType_t<Flt>>(x);
    xi &= 0x7fffffff;
    x = bit_cast<Flt>(xi);

    // acos(x) = sqrt(1-x) * P(x), where P approximates acos(x)/sqrt(1-x) on [0, 1).
    // Horner (not Estrin): Estrin regresses acos ULP from 3 to 4.
    Flt y;
    if constexpr (AccuracyTraits::kMaxAccuracy || AccuracyTraits::kBoundsValues) {
      // Sollya fpminimax(acos(x)/sqrt(1-x), 8, [|SG...|], [0, 1-1b-23]):
      //   sup-norm error < 2^-25.
      y = dispenso::fast_math::hornerEval(
          x,
          0x1.d129f2p-11f,
          -0x1.3ca11ap-8f,
          0x1.9a3376p-7f,
          -0x1.6a1a08p-6f,
          0x1.10b838p-5f,
          -0x1.a0365ep-5f,
          0x1.6cce04p-4f,
          -0x1.b7820cp-3f,
          0x1.921fb6p0f);
    } else {
      // Sollya fpminimax(acos(x)/sqrt(1-x), 7, [|SG...|], [0, 1-1b-23]):
      //   sup-norm error < 2^-24.
      y = dispenso::fast_math::hornerEval(
          x,
          -0x1.39a988p-10f,
          0x1.a50fdp-8f,
          -0x1.120206p-6f,
          0x1.f5a1e2p-6f,
          -0x1.9a1efp-5f,
          0x1.6c5d48p-4f,
          -0x1.b77e7ep-3f,
          0x1.921fb4p0f);
    }

    auto sqrt1mx = FloatTraits<Flt>::sqrt(1.0f - x);
    return FloatTraits<Flt>::conditional(
        xneg, FloatTraits<Flt>::fma(y, -sqrt1mx, Flt(kPi)), y * sqrt1mx);
  }
}

/**
 * @brief Arc sine approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 2 ULP. kMaxAccuracy: 1 ULP (higher-degree polynomials).
 * @param x Input value in [-1, 1].
 * @return Arc sine of @p x in radians. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt asin(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return asin<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    using IntT = IntType_t<Flt>;

    IntT xi = bit_cast<IntT>(x);
    IntT sgnbit = xi & 0x80000000;
    xi &= 0x7fffffff;
    x = bit_cast<Flt>(xi);

    Flt ret;
    if constexpr (std::is_same_v<Flt, float>) {
      if (xi > 0x3f000000) { // x > 0.5
        ret = detail::asin_pt5_1<Flt, AccuracyTraits>(x);

      } else {
        ret = detail::asin_0_pt5<Flt, AccuracyTraits>(x);
      }
    } else {
      // Use compensating sum for better rounding.
      auto y = detail::asin_pt5_1<Flt, AccuracyTraits>(x);
      auto z = detail::asin_0_pt5<Flt, AccuracyTraits>(x);
      ret = FloatTraits<Flt>::conditional(xi > 0x3f000000, y, z); // choose between correct estimate
    }
    return bit_cast<Flt>(sgnbit | bit_cast<IntT>(ret));
  }
}

/**
 * @brief Sine approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits kMaxAccuracy uses higher-precision range reduction.
 *   Default: 2 ULP in [-2^20 pi, 2^20 pi]; accuracy degrades for larger |x|.
 * @param x Input value in radians (all float domain).
 * @return Sine of @p x. Compatible with all SIMD backends.
 *
 * Scalar: pi/4 reduction with quadrant branching (lowest latency).
 * SIMD:   pi reduction, single sin polynomial, sign flip (no blending, ~30-40% faster).
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt sin(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return sin<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    // Scalar: pi/4 reduction + branch selects sin or cos polynomial per quadrant.
    constexpr float kPi_2hi = 1.57079625f;
    constexpr float kPi_2med = 7.54978942e-08f;
    constexpr float kPi_2lo = 5.39032794e-15f;
    constexpr float k2_pi = 0.636619747f;
    Flt j = detail::rangeReduce2(x, k2_pi, kPi_2hi, kPi_2med, kPi_2lo);
    return detail::sincos_pi_impl(x, j, 0);
  } else {
    // SIMD: pi reduction — single sin_pi_2 polynomial, sign flip, no blending.
    constexpr float kPi_hi = 3.1415925f;
    constexpr float kPi_med = 1.50995788e-07f;
    constexpr float kPi_lo = 1.07806559e-14f;
    constexpr float k1_pi = 0.318309886f;
    Flt j = detail::rangeReduce2(x, k1_pi, kPi_hi, kPi_med, kPi_lo);
    return detail::sin_pi_reduction(x, j);
  }
}

/**
 * @brief Cosine approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits kMaxAccuracy uses 4-part Cody-Waite range reduction.
 *   Default: 2 ULP in [-2^15 pi, 2^15 pi].
 *   MaxAccuracy: 2 ULP in [-2^20 pi, 2^20 pi].
 * @param x Input value in radians (all float domain).
 * @return Cosine of @p x. Compatible with all SIMD backends.
 *
 * Scalar: pi/4 reduction with quadrant branching (lowest latency).
 * SIMD:   offset-pi reduction (q = 2*trunc(|x|/pi)+1), single sin polynomial,
 *         sign flip (no blending, ~30% faster).
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt cos(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return cos<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    // Scalar: pi/4 reduction + branch selects sin or cos polynomial per quadrant.
    Flt j;
    if constexpr (AccuracyTraits::kMaxAccuracy) {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2medh = 7.54978942e-08f;
      constexpr float kPi_2medl = 5.39030253e-15f;
      constexpr float kPi_2lo = 3.28200367e-22f;
      constexpr float k2_pi = 0.636619747f;
      j = detail::rangeReduce3(x, k2_pi, kPi_2hi, kPi_2medh, kPi_2medl, kPi_2lo);
    } else {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2med = 7.54978942e-08f;
      constexpr float kPi_2lo = 5.39032794e-15f;
      constexpr float k2_pi = 0.636619747f;
      j = detail::rangeReduce2(x, k2_pi, kPi_2hi, kPi_2med, kPi_2lo);
    }
    return detail::sincos_pi_impl(x, j, 1);
  } else {
    // SIMD: offset-pi reduction maps cos zeros to x_r = 0, then evaluates sin_pi_2.
    auto fma = FloatTraits<Flt>::fma;
    Flt y = fabs(x);
    constexpr float k1_pi = 0.318309886f;
    auto qi = convert_to_int_trunc(y * k1_pi);
    qi = qi + qi + 1;
    Flt qf = Flt(qi);
    if constexpr (AccuracyTraits::kMaxAccuracy) {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2medh = 7.54978942e-08f;
      constexpr float kPi_2medl = 5.39030253e-15f;
      constexpr float kPi_2lo = 3.28200367e-22f;
      y = fma(qf, Flt(-kPi_2hi), y);
      y = fma(qf, Flt(-kPi_2medh), y);
      y = fma(qf, Flt(-kPi_2medl), y);
      y = fma(qf, Flt(-kPi_2lo), y);
    } else {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2med = 7.54978942e-08f;
      constexpr float kPi_2lo = 5.39032794e-15f;
      y = fma(qf, Flt(-kPi_2hi), y);
      y = fma(qf, Flt(-kPi_2med), y);
      y = fma(qf, Flt(-kPi_2lo), y);
    }
    return detail::cos_offset_pi_reduction(y, qi);
  }
}

/**
 * @brief Simultaneous sine and cosine, sharing range reduction.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Same behavior as sin/cos: kMaxAccuracy controls
 *   range reduction precision. Default: 2 ULP each.
 * @param x Input value in radians (all float domain).
 * @param[out] out_sin Pointer to receive sin(x).
 * @param[out] out_cos Pointer to receive cos(x).
 * @note Can be faster than separate sin() + cos() calls due to shared range
 *   reduction and polynomial evaluation (SIMD). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE void sincos(Flt x, Flt* out_sin, Flt* out_cos) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    SimdType_t<Flt> s, c;
    sincos<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x), &s, &c);
    *out_sin = s.v;
    *out_cos = c.v;
  } else {
    Flt j;
    if constexpr (AccuracyTraits::kMaxAccuracy) {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2medh = 7.54978942e-08f;
      constexpr float kPi_2medl = 5.39030253e-15f;
      constexpr float kPi_2lo = 3.28200367e-22f;
      constexpr float k2_pi = 0.636619747f;
      j = detail::rangeReduce3(x, k2_pi, kPi_2hi, kPi_2medh, kPi_2medl, kPi_2lo);
    } else {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2med = 7.54978942e-08f;
      constexpr float kPi_2lo = 5.39032794e-15f;
      constexpr float k2_pi = 0.636619747f;
      j = detail::rangeReduce2(x, k2_pi, kPi_2hi, kPi_2med, kPi_2lo);
    }
    detail::sincos_pi_impl_both(x, j, out_sin, out_cos);
  }
}

/**
 * @brief Compute sin(pi * x) with exact range reduction.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Accepted but ignored (range reduction is always exact).
 * @param x Input value (all float domain). Exact at integers (returns 0) and
 *   half-integers (returns ±1). ~2 ULP for general inputs.
 * @return sin(pi * x). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt sinpi(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return sinpi<SimdType_t<Flt>>(SimdType_t<Flt>(x)).v;
  } else {
    // Exact range reduction: j = round(2*x), r = x - j/2, t = r * pi.
    constexpr float kMagic = FloatTraits<Flt>::kMagic; // 1.5 * 2^23
    Flt j = (2.0f * x + kMagic) - kMagic; // round(2*x)
    Flt r = x - j * 0.5f; // exact for |x| < 2^22
    Flt t = r * kPi; // only error source
    return detail::sincos_pi_impl(t, j, 0);
  }
}

/**
 * @brief Compute cos(pi * x) with exact range reduction.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Accepted but ignored (range reduction is always exact).
 * @param x Input value (all float domain). Exact at integers (returns ±1) and
 *   half-integers (returns 0). ~2 ULP for general inputs.
 * @return cos(pi * x). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt cospi(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return cospi<SimdType_t<Flt>>(SimdType_t<Flt>(x)).v;
  } else {
    constexpr float kMagic = FloatTraits<Flt>::kMagic;
    Flt j = (2.0f * x + kMagic) - kMagic;
    Flt r = x - j * 0.5f;
    Flt t = r * kPi;
    return detail::sincos_pi_impl(t, j, 1);
  }
}

/**
 * @brief Simultaneous sin(pi*x) and cos(pi*x) with exact range reduction.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Accepted but ignored.
 * @param x Input value (all float domain).
 * @param[out] out_sin Pointer to receive sin(pi * x).
 * @param[out] out_cos Pointer to receive cos(pi * x).
 * @note Faster than separate sinpi() + cospi() calls. ~2 ULP each.
 *   Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE void sincospi(Flt x, Flt* out_sin, Flt* out_cos) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    SimdType_t<Flt> s, c;
    sincospi<SimdType_t<Flt>>(SimdType_t<Flt>(x), &s, &c);
    *out_sin = s.v;
    *out_cos = c.v;
  } else {
    constexpr float kMagic = FloatTraits<Flt>::kMagic;
    Flt j = (2.0f * x + kMagic) - kMagic;
    Flt r = x - j * 0.5f;
    Flt t = r * kPi;
    detail::sincos_pi_impl_both(t, j, out_sin, out_cos);
  }
}

/**
 * @brief Tangent approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 3 ULP in [-256K pi, 256K pi].
 *   kMaxAccuracy uses 4-part Cody-Waite reduction for wider accurate range.
 * @param x Input value in radians; undefined at odd multiples of pi/2.
 * @return Tangent of @p x. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt tan(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return tan<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    Flt j;
    if constexpr (AccuracyTraits::kMaxAccuracy) {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2medh = 7.54978942e-08f;
      constexpr float kPi_2medl = 5.39030253e-15f;
      constexpr float kPi_2lo = 3.28200367e-22f;
      constexpr float k2_pi = 0.636619747f;
      j = detail::rangeReduce3(x, k2_pi, kPi_2hi, kPi_2medh, kPi_2medl, kPi_2lo);
    } else {
      constexpr float kPi_2hi = 1.57079625f;
      constexpr float kPi_2med = 7.54978942e-08f;
      constexpr float kPi_2lo = 5.39032794e-15f;
      constexpr float k2_pi = 0.636619747f;
      j = detail::rangeReduce2(x, k2_pi, kPi_2hi, kPi_2med, kPi_2lo);
    }
    return detail::tan_pi_2_impl(x, j);
  }
}

/**
 * @brief Arc tangent approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 3 ULP. kMaxAccuracy: 2 ULP (degree-11 polynomial).
 * @param x Input value (all float domain).
 * @return Arc tangent of @p x in radians. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt atan(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return atan<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    using IntT = IntType_t<Flt>;

    IntT xi = bit_cast<IntT>(x);
    IntT sgnbit = xi & 0x80000000;
    xi &= 0x7fffffff;
    x = bit_cast<Flt>(xi);

    auto flip = x > 1.0f;
    // TODO(bbudge): Verify, Some compilers (MSVC) don't optimize this as well, check for scalar
    // case.
    if constexpr (std::is_same_v<Flt, float>) {
      if (flip) {
        x = 1.0f / x;
      }
    } else {
      x = FloatTraits<Flt>::conditional(flip, 1.0f / x, x);
    }

    auto y = detail::atan_poly<Flt, AccuracyTraits>(x);

    y = FloatTraits<Flt>::conditional(flip, kPi_2 - y, y);

    y = bit_cast<Flt>(bit_cast<IntT>(y) | sgnbit);

    return y;
  }
}

/**
 * @brief Base-2 exponential approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 1 ULP. kMaxAccuracy has no additional effect.
 *   kBoundsValues: returns NaN for NaN, clamps input to avoid overflow.
 * @param x Input value; [-127, 128] for normal output.
 * @return 2^x. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt exp2(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return exp2<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    using IntT = IntType_t<Flt>;

    Flt xf;
    IntT xi;
    if constexpr (AccuracyTraits::kBoundsValues) {
      if constexpr (std::is_same_v<float, Flt>) {
        if (!(x == x)) {
          return x;
        }
        x = clamp_allow_nan(x, -127.0f, 128.0f);
        xf = floor_small(x);
        xi = convert_to_int(xf);
      } else {
        // bool_as_mask converts any comparison result (lane-wide or __mmask16)
        // to IntType uniformly across all SIMD backends.
        IntType_t<Flt> nonanmask = bool_as_mask<IntType_t<Flt>>(x == x);
        x = clamp_allow_nan(x, Flt(-127.0f), Flt(128.0f));
        xf = floor_small(x);
        xi = nonanmask & convert_to_int(xf);
      }
    } else {
      xf = floor_small(x);
      xi = convert_to_int(xf);
    }

    x -= xf;
    xi += 127;
    xi <<= 23;

    Flt powxi = bit_cast<Flt>(xi);

    Flt y = dispenso::fast_math::hornerEval(
        x,
        0x1.c41242p-13f,
        0x1.4748aep-10f,
        0x1.3cfb6p-7f,
        0x1.c68b2ep-5f,
        0x1.ebfd38p-3f,
        0x1.62e42cp-1f,
        0x1p0f);

    return powxi * y;
  }
}

/**
 * @brief Natural exponential approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 3 ULP. MaxAccuracy: 1 ULP (Sollya-optimized
 *   polynomial + Norbert Juffa's rounding technique).
 *   kBoundsValues: returns 0 for large negative, inf for large positive, NaN for NaN.
 * @param x Input value; [-89, 89] for normal output.
 * @return e^x. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt exp(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return exp<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    using IntT = IntType_t<Flt>;
    using UintT = UintType_t<Flt>;
    constexpr float k1_ln2 = k1_Ln2;
    constexpr float kLn2hi = 6.93145752e-1f;
    constexpr float kLn2lo = 1.42860677e-6f;

    if constexpr (AccuracyTraits::kMaxAccuracy || AccuracyTraits::kBoundsValues) {
      // Rounding algorithm courtesy Norbert Juffa, https://stackoverflow.com/a/40519989/830441
      // exp(x) = 2**i * exp(f); i = rintf (x / log(2))
      Flt f = x;
      Flt j = detail::rangeReduce(f, k1_ln2, kLn2hi, kLn2lo);
      IntT i = convert_to_int(j);
      // approximate r = exp(f) on interval [-log(2)/2, +log(2)/2]
      Flt y = dispenso::fast_math::hornerEval(
          f,
          0x1.6850e4p-10f,
          0x1.123bccp-7f,
          0x1.555b98p-5f,
          0x1.55548ep-3f,
          0x1.fffff8p-2f,
          1.f,
          1.f);
      // exp(x) = 2**i * y (with rounding)
      auto ipos = i > IntT(0);
      UintT ia = FloatTraits<Flt>::conditional(ipos, UintT(0u), UintT(0x83000000u));
      Flt s = bit_cast<Flt>(UintT(0x7f000000u) + ia);
      Flt t = bit_cast<Flt>((UintT(i) << 23) - ia);
      y = y * s;
      y = y * t;
      if constexpr (AccuracyTraits::kBoundsValues) {
        if constexpr (std::is_same_v<float, Flt>) {
          auto zero = (x < -89.0f);
          auto overflow = (x > 89.0f);
          if ((zero || overflow)) {
            Flt orbits = bit_cast<Flt>(bool_apply_or_zero(overflow, 0x7f800000));
            y = FloatTraits<Flt>::conditional(zero | overflow, orbits, y);
          }
        } else {
          auto zero = (x < -89.0f);
          auto overflow = (x > 89.0f);
          IntT orbits = bool_apply_or_zero<IntT>(overflow, IntT(0x7f800000));
          auto mask = bool_as_mask<IntT>(zero) | bool_as_mask<IntT>(overflow);
          y = FloatTraits<Flt>::conditional(mask, bit_cast<Flt>(orbits), y);
        }
      }
      return y;
    } else {
      Flt j = detail::rangeReduceFloor(x, k1_ln2, kLn2hi, kLn2lo);
      IntT xi = convert_to_int(j);

      xi += 127;
      xi <<= 23;

      Flt powxi = bit_cast<Flt>(xi);

      // approximate r = exp(f) on interval [0, ln2]
      Flt y = dispenso::fast_math::hornerEval(
          x,
          0x1.8014dp-7f,
          0x1.3f3846p-5f,
          0x1.57481ep-3f,
          0x1.ffd96ep-2f,
          0x1.000086p0f,
          0x1.fffffep-1f);

      return powxi * y;
    }
  }
}

/**
 * @brief Base-10 exponential approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 2 ULP. kBoundsValues: handles NaN.
 * @param x Input value; [-38, 38] for normal output.
 * @return 10^x. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt exp10(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return exp10<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    using IntT = IntType_t<Flt>;
    constexpr double kLog10of2d = 0.3010299956639812;
    constexpr float k1_log10of2 = static_cast<float>(1.0 / kLog10of2d);
    constexpr float kLog10of2 = static_cast<float>(kLog10of2d);
    constexpr float kLog10of2lo = static_cast<float>(kLog10of2d - kLog10of2);

    Flt j;
    IntT xi;

    if constexpr (AccuracyTraits::kBoundsValues) {
      IntType_t<Flt> nonanmask = bool_as_mask<IntType_t<Flt>>(x == x);

      x = clamp_allow_nan(x, Flt(-38.23080944932561f), Flt(38.53183944498959f));
      j = detail::rangeReduceFloor(x, k1_log10of2, kLog10of2, kLog10of2lo);
      xi = nonanmask & convert_to_int(j);
    } else {
      j = detail::rangeReduceFloor(x, k1_log10of2, kLog10of2, kLog10of2lo);
      xi = convert_to_int(j);
    }

    xi += 127;
    xi <<= 23;

    Flt powxi = bit_cast<Flt>(xi);
    Flt y = dispenso::fast_math::hornerEval(
        x,
        0x1.293a54p-2f,
        0x1.024feap-1f,
        0x1.2d9da2p0f,
        0x1.0459f6p1f,
        0x1.53534cp1f,
        0x1.26bb18p1f,
        0x1p0f);

    return powxi * y;
  }
}

/**
 * @brief Base-2 logarithm approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 1 ULP. kMaxAccuracy handles denormals correctly.
 *   kBoundsValues: returns -inf for 0, NaN for negative, +inf for +inf.
 * @param x Input value in (0, +inf).
 * @return log2(x). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt log2(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return log2<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    auto [xi, i, m] = detail::logarithmSep<Flt, AccuracyTraits>(x);

    m = m - 1.0f;

    // Compute log2(1+m) for m in [sqrt(0.5)-1, sqrt(2.0)-1]
    // Degree-9 polynomial with c0=0 absorbed: the Horner chain evaluates c9..c1,
    // then fma(y, m, m) applies the final y*m + m (since c0=0, c1 is folded into +m).
    Flt y = dispenso::fast_math::hornerEval(
        m,
        -1.09985352e-1f,
        1.86182275e-1f,
        -1.91066533e-1f,
        2.04593703e-1f,
        -2.39627063e-1f,
        2.88573444e-1f,
        -3.60695332e-1f,
        4.80897635e-1f,
        -7.21347392e-1f,
        4.42695051e-1f);
    y = FloatTraits<Flt>::fma(y, m, m); // c0=0 absorbed: y*m + m

    y = y + i;

    /* Check for and handle special cases */
    if constexpr (AccuracyTraits::kBoundsValues) {
      y = detail::logarithmBounds(x, y, xi);
    }

    return y;
  }
}

/**
 * @brief Natural logarithm approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 2 ULP. kMaxAccuracy handles denormals correctly.
 *   kBoundsValues: returns -inf for 0, NaN for negative, +inf for +inf.
 * @param x Input value in (0, +inf).
 * @return ln(x). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt log(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return log<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    auto [xi, i, m] = detail::logarithmSep<Flt, AccuracyTraits>(x);

    m = m - 1.0f;

    // Compute log(1+m) for m in [sqrt(0.5)-1, sqrt(2.0)-1]
    // Degree-9 polynomial with c0=0, c1=1 absorbed: y = P(m)*m² + m.
    // P(m) evaluates c9..c2 (8 coefficients), then fma(P, m², m) applies c1=1 and c0=0.
    Flt y = dispenso::fast_math::hornerEval(
        m,
        0.0924733654f,
        -0.14482744f,
        0.148145974f,
        -0.165455937f,
        0.199700251f,
        -0.250024557f,
        0.333337069f,
        -0.499999911f);
    y = FloatTraits<Flt>::fma(y, m * m, m); // c0=0, c1=1 absorbed

    y = FloatTraits<Flt>::fma(i, kLn2, y);

    /* Check for and handle special cases */
    if constexpr (AccuracyTraits::kBoundsValues) {
      y = detail::logarithmBounds(x, y, xi);
    }

    return y;
  }
}

/**
 * @brief Base-10 logarithm approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 3 ULP. kMaxAccuracy handles denormals correctly.
 *   kBoundsValues: returns -inf for 0, NaN for negative, +inf for +inf.
 * @param x Input value in (0, +inf).
 * @return log10(x). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt log10(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return log10<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    auto [xi, i, m] = detail::logarithmSep<Flt, AccuracyTraits>(x);

    m = m - 1.0f;

    // Compute log10(1+m) for m in [sqrt(0.5)-1, sqrt(2.0)-1]
    auto fma = FloatTraits<Flt>::fma;
    // Degree-9 polynomial with c0=0 absorbed: y = P(m) * m, where
    // P(m) = c9*m^8 + ... + c1 (9 coefficients).
    Flt y = dispenso::fast_math::hornerEval(
                m,
                0.0326662175f,
                -0.0601400957f,
                0.0659840927f,
                -0.0723559037f,
                0.086600922f,
                -0.108560629f,
                0.144770443f,
                -0.217147425f,
                0.434294462f) *
        m;

    constexpr float kLog10of2 = 0.30102999566398f;
    y = fma(i, kLog10of2, y);

    /* Check for and handle special cases */
    if constexpr (AccuracyTraits::kBoundsValues) {
      y = detail::logarithmBounds(x, y, xi);
    }

    return y;
  }
}

/**
 * @brief Two-argument arc tangent approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: 3 ULP. kMaxAccuracy: 2 ULP (degree-11 polynomial).
 *   kBoundsValues: handles the case where both arguments are inf.
 * @param y Y coordinate (all float domain).
 * @param x X coordinate (all float domain).
 * @return atan2(y, x) in radians. Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt atan2(Flt y, Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return atan2<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(y), SimdType_t<Flt>(x)).v;
  } else {
    using UintT = UintType_t<Flt>;
    UintT yi = bit_cast<UintT>(y);
    UintT xi = bit_cast<UintT>(x);

    auto someNonzero = ((yi | xi) & 0x7fffffff) != 0;

    UintT xsgn = xi & 0x80000000;
    UintT ysgn = yi & 0x80000000;
    UintT allsgn = xsgn ^ ysgn;

    yi &= 0x7fffffff;
    xi &= 0x7fffffff;

    auto flip = yi > xi;

    if constexpr (AccuracyTraits::kBoundsValues) {
      using IntT = IntType_t<Flt>;
      auto yinf = yi == UintT(0x7f800000u);
      auto bothinf = (yinf & (xi == UintT(0x7f800000u)));
      auto bothinfi = bool_as_mask<IntT>(bothinf);
      yi = FloatTraits<Flt>::conditional(bothinfi, UintT(0x7f7fffffu), yi);
      xi = FloatTraits<Flt>::conditional(bothinfi, UintT(0x7f7fffffu), xi);
      // keepsgn = NOT(yinf AND NOT bothinf) = bothinf OR NOT yinf.
      auto keepsgn = !((!bothinf) & yinf);
      xsgn &= bool_as_mask<UintT>(keepsgn);
    }

    Flt den = bit_cast<Flt>(FloatTraits<Flt>::conditional(flip, yi, xi));
    Flt num = bit_cast<Flt>(FloatTraits<Flt>::conditional(flip, xi, yi));

    Flt y_x = bool_apply_or_zero(someNonzero, num / den);

    auto z = detail::atan_poly<Flt, AccuracyTraits>(y_x);

    z = FloatTraits<Flt>::conditional(flip, kPi_2 - z, z);

    z = bit_cast<Flt>(bit_cast<UintT>(z) | allsgn);

    // Branchless offset: π where x<0, 0 where x≥0, with y's sign applied.
    UintT x_neg_mask = UintT(0) - (xsgn >> 31);
    Flt pi_or_zero = bit_cast<Flt>(x_neg_mask & bit_cast<UintT>(Flt(kPi)));
    Flt offset = bit_cast<Flt>(bit_cast<UintT>(pi_or_zero) | ysgn);
    return z + offset;
  }
}

/**
 * @brief Hypotenuse: sqrt(x*x + y*y) with overflow protection.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: ~1 ULP for all normal floats.
 *   kBoundsValues: handles inf/NaN per IEEE 754 (hypot(inf,NaN) = +inf).
 *   kMaxAccuracy: accepted but has no additional effect.
 * @param x First input (all float domain).
 * @param y Second input (all float domain).
 * @return sqrt(x*x + y*y). Compatible with all SIMD backends.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt hypot(Flt x, Flt y) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return hypot<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x), SimdType_t<Flt>(y)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    if constexpr (AccuracyTraits::kBoundsValues) {
      uint32_t ax = bit_cast<uint32_t>(x) & 0x7fffffffu;
      uint32_t ay = bit_cast<uint32_t>(y) & 0x7fffffffu;
      if (DISPENSO_EXPECT(ax == 0x7f800000u || ay == 0x7f800000u, 0)) {
        return std::numeric_limits<float>::infinity();
      }
      if (DISPENSO_EXPECT(ax > 0x7f800000u || ay > 0x7f800000u, 0)) {
        return std::numeric_limits<float>::quiet_NaN();
      }
    }
    // Scalar: cast to double for correctly-rounded result.
    double xd = static_cast<double>(x);
    double yd = static_cast<double>(y);
    return static_cast<float>(std::sqrt(std::fma(xd, xd, yd * yd)));
  } else {
    // SIMD: dynamically scale inputs based on their exponent to keep
    // intermediates in normal float range.  Extracts the exponent of
    // max(|x|,|y|), builds scale = 2^(127-E) so the scaled max is in [1,2).
    // Clamp ensures the scale itself stays in normal range.
    using IntT = IntType_t<Flt>;
    using UintT = UintType_t<Flt>;
    Flt abs_mask = bit_cast<Flt>(UintT(0x7fffffffu));
    Flt max_abs = FloatTraits<Flt>::max(x & abs_mask, y & abs_mask);
    // Clamp to [FLT_MIN, 2^126] so scale and unscale stay normal.
    Flt clamped =
        FloatTraits<Flt>::max(FloatTraits<Flt>::min(max_abs, Flt(0x1p126f)), Flt(0x1p-126f));
    IntT clamped_exp = bit_cast<IntT>(clamped) & IntT(0x7f800000);
    // scale = 2^(127-E), unscale = 2^(E-127).
    Flt scale = bit_cast<Flt>(IntT(0x7f000000) - clamped_exp);
    Flt xs = x * scale;
    Flt ys = y * scale;
    auto fma = FloatTraits<Flt>::fma;
    Flt h = FloatTraits<Flt>::sqrt(fma(xs, xs, ys * ys));
    // Newton refinement: h' = h + (xs²+ys²-h²) * rcp(2h)
    Flt r = fma(xs, xs, fma(ys, ys, -(h * h)));
    Flt rcp_2h = FloatTraits<Flt>::rcp(h + h + Flt(std::numeric_limits<float>::min()));
    h = fma(r, rcp_2h, h);
    Flt unscale = bit_cast<Flt>(clamped_exp);
    h = h * unscale;
    if constexpr (AccuracyTraits::kBoundsValues) {
      // IEEE 754: hypot(±inf, y) = +inf even when y is NaN.
      // Inf inputs produce NaN through Newton refinement, so override here.
      UintT ax = bit_cast<UintT>(x) & UintT(0x7fffffffu);
      UintT ay = bit_cast<UintT>(y) & UintT(0x7fffffffu);
      auto either_inf = (ax == UintT(0x7f800000u)) | (ay == UintT(0x7f800000u));
      auto inf_mask = bool_as_mask<IntT>(either_inf);
      h = FloatTraits<Flt>::conditional(inf_mask, bit_cast<Flt>(UintT(0x7f800000u)), h);
    }
    return h;
  }
}

/**
 * @brief Power function: x^y.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: ~1 ULP for scalar float (double-precision core),
 *   error scales as |y*log2(x)|*ln2 ULP for SIMD (~1-2 for moderate y).
 *   kMaxAccuracy: uses extended-precision log2 for ~1-2 ULP even for large y (SIMD only;
 *   scalar float always uses the double-precision core which subsumes this).
 *   kBoundsValues: full IEEE 754 special-case handling (NaN, Inf, zero, negative bases).
 * @param x Base (all float domain, including negative).
 * @param y Exponent (SIMD vector).
 * @return x^y. Negative x: returns -|x|^y for odd integer y, NaN for non-integer y.
 *   Compatible with all SIMD backends.
 *
 * Scalar float uses a table-based double-precision core (~2.5ns).
 * SIMD uses float-precision exp2/log2 with FMA error-free transform.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt pow(Flt x, NonDeduced<Flt> y) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return pow<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x), SimdType_t<Flt>(y)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    // Scalar float: double-precision pow core (table-based log2 + exp2).
    // Uses integer checkint() for parity (~5 insns vs ~40 for float_is_int/odd).
    // Branchless sign XOR. Table entry i=9 is {1.0, 0.0} so pow(1,y) = 1 exactly.
    uint32_t ix = bit_cast<uint32_t>(x);
    uint32_t iy = bit_cast<uint32_t>(y);
    // sign_bias is 0x80000000u for odd-integer y with negative x, else 0.
    // Always XORed into the result (no-op when 0).
    uint32_t sign_bias = 0;
    // neg_nan: set if negative x with non-integer y (domain error → NaN).
    // Only used by the non-bounds path (deferred after core computation).
    uint32_t neg_nan = 0;

    if constexpr (AccuracyTraits::kBoundsValues) {
      // Combined fast-path test: detects any x or y that needs special handling.
      // First term: ix - 0x00800000 >= 0x7F000000 is true when ix < 0x00800000
      // (zero, subnormal) or ix >= 0x7F800000 (inf, NaN) — but also when ix
      // has the sign bit set (negative), because unsigned subtraction wraps
      // to a huge value. This means negative x enters the special block too,
      // avoiding a separate negative-x check.
      // Second term: 2u*iy - 1u >= 0xFEFFFFFF tests y = ±0, ±inf, or NaN
      // (see bit trick comments in pow_scalar_y_special).
      if (DISPENSO_EXPECT(
              ix - 0x00800000u >= 0x7f800000u - 0x00800000u ||
                  2u * iy - 1u >= 2u * 0x7f800000u - 1u,
              0)) {
        float special;
        if (2u * iy - 1u >= 2u * 0x7f800000u - 1u) {
          detail::pow_scalar_y_special(x, y, ix, iy, &special);
          return special;
        }
        if (detail::pow_scalar_x_special(ix, iy, sign_bias, &special))
          return special;
        // Fall through to core computation with adjusted ix.
      }
    } else {
      // Non-bounds path: one branch for the common case (positive x),
      // branchless checkint in the else (rare negative-x path avoids further mispredicts).
      if (DISPENSO_EXPECT(!(ix & 0x80000000u), 1)) {
        // Positive x: nothing to do.
      } else {
        ix &= 0x7fffffffu;
        int32_t yint = detail::checkint_branchless(iy);
        sign_bias = static_cast<uint32_t>(yint == 2) * 0x80000000u;
        neg_nan = static_cast<uint32_t>(yint == 0);
      }
    }

    float r;
    if constexpr (AccuracyTraits::kMaxAccuracy) {
      // Tableless core: logarithmSep handles subnormals internally (pre-scales
      // by 2^23), so pass the original |x| — not the bounds-adjusted ix.
      r = detail::pow_double_poly_core(std::fabs(x), y);
    } else {
      // Table core: uses ix directly. The bounds path above already normalized
      // subnormals (scale by 2^23, subtract 23 from exponent) so ix is valid.
      r = detail::pow_double_core(bit_cast<float>(ix), y);
    }

    // Branchless sign flip (no-op when sign_bias == 0).
    r = bit_cast<float>(bit_cast<uint32_t>(r) ^ sign_bias);
    // Non-bounds: apply deferred NaN for neg^non-int, and pow(x,0)=1.
    if constexpr (!AccuracyTraits::kBoundsValues) {
      if (neg_nan)
        r = std::numeric_limits<float>::quiet_NaN();
      if (y == 0.0f)
        r = 1.0f;
    }

    return r;
  } else {
    // SIMD path: double-precision core for all accuracy levels (~1 ULP).
    using UintT = UintType_t<Flt>;

    Flt ax = fabs(x);
    Flt r;

    // Width-dependent dispatch: table gathers are cheap at 4 lanes but
    // scale poorly; polynomial cost amortizes over more lanes.
    if constexpr (sizeof(Flt) <= 16) {
      r = detail::pow_double_hybrid_core(ax, y);
    } else {
      r = detail::pow_double_poly_core(ax, y);
    }
    if constexpr (AccuracyTraits::kBoundsValues) {
      detail::pow_simd_special_fixup(ax, y, r);
    }

    // Sign handling for negative x (SIMD, branchless per-lane):
    // The double core computes pow(|x|, y). For negative x we need:
    //   - odd integer y:     negate result (e.g. pow(-2, 3) = -8)
    //   - even integer y:    keep positive (e.g. pow(-2, 2) = 4)
    //   - non-integer y:     result is NaN (domain error)
    // float_is_int/float_is_odd test this via mantissa bit inspection
    // (branchless SIMD comparisons, no scalar checkint needed).
    UintT xsign = bit_cast<UintT>(x) & UintT(0x80000000u);
    auto negx = x < 0.0f;
    auto y_int = float_is_int(y);
    auto y_odd = float_is_odd(y);
    // sign_mask is 0x80000000 in lanes where x < 0 AND y is odd, else 0.
    // XORing into r negates exactly those lanes.
    UintT sign_mask = xsign & bool_as_mask<UintT>(y_odd);
    r = bit_cast<Flt>(bit_cast<UintT>(r) ^ sign_mask);
    // Replace with NaN where x < 0 and y is not an integer.
    constexpr float kNaN = std::numeric_limits<float>::quiet_NaN();
    r = FloatTraits<Flt>::conditional(negx & !y_int, Flt(kNaN), r);

    // pow(x, 0) = 1 for all x (IEEE 754).
    r = FloatTraits<Flt>::conditional(y == 0.0f, Flt(1.0f), r);

    if constexpr (AccuracyTraits::kBoundsValues) {
      // pow(1, y) = 1 even for NaN y.
      r = FloatTraits<Flt>::conditional(x == 1.0f, Flt(1.0f), r);
      // pow(-1, ±inf) = 1.
      constexpr float kInf = std::numeric_limits<float>::infinity();
      auto x_neg1 = (x == -1.0f);
      auto y_inf = fabs(y) == Flt(kInf);
      r = FloatTraits<Flt>::conditional(x_neg1 & y_inf, Flt(1.0f), r);
    }

    return r;
  }
}

/**
 * @brief Power function with scalar exponent: x^y.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Same as pow(Flt, Flt).
 * @param x Base (all float domain, including negative).
 * @param y Scalar exponent — enables scalar branching optimizations.
 * @return x^y. Same semantics as pow(Flt, Flt).
 *   Compatible with all SIMD backends.
 *
 * Scalar y enables fast paths: y=0→1, y=1→x, y=-1→1/x, y=0.5→sqrt,
 * y=2→x*x, integer y→binary squaring. General case uses exp2/log2.
 */
template <
    typename Flt,
    typename AccuracyTraits = DefaultAccuracyTraits,
    typename = std::enable_if_t<!std::is_same_v<Flt, float>>>
DISPENSO_INLINE Flt pow(Flt x, float y) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return pow<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x), y).v;
  } else {
    using UintT = UintType_t<Flt>;

    // Scalar fast paths: y=0→1, y=1→x, y=-1→1/x, y=0.5→sqrt, y=2→x*x,
    // small integer y → binary squaring.
    bool y_is_int = false, y_is_odd = false;
    Flt fast_result;
    if (detail::pow_scalar_y_fast_path(x, y, y_is_int, y_is_odd, fast_result))
      return fast_result;

    Flt ax = fabs(x);
    Flt r;
    Flt yv(y);

    // Double-precision core for all SIMD accuracy levels (~1 ULP).
    if constexpr (sizeof(Flt) <= 16) {
      r = detail::pow_double_hybrid_core(ax, yv);
    } else {
      r = detail::pow_double_poly_core(ax, yv);
    }
    if constexpr (AccuracyTraits::kBoundsValues) {
      detail::pow_simd_special_fixup(ax, yv, r);
    }

    // Sign handling for negative x.
    if (y_is_odd) {
      UintT xsign = bit_cast<UintT>(x) & UintT(0x80000000u);
      r = bit_cast<Flt>(bit_cast<UintT>(r) ^ xsign);
    }
    if (!y_is_int) {
      auto negx = x < 0.0f;
      constexpr float kNaN = std::numeric_limits<float>::quiet_NaN();
      r = FloatTraits<Flt>::conditional(negx, Flt(kNaN), r);
    }

    if constexpr (AccuracyTraits::kBoundsValues) {
      r = FloatTraits<Flt>::conditional(x == 1.0f, Flt(1.0f), r);
      constexpr float kInf = std::numeric_limits<float>::infinity();
      if (y == kInf || y == -kInf) {
        auto x_neg1 = (x == -1.0f);
        r = FloatTraits<Flt>::conditional(x_neg1, Flt(1.0f), r);
      }
    }

    return r;
  }
}

/**
 * @brief Compute exp(x) - 1 with precision near zero.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: ~2 ULP (1-step CW + degree-4 poly).
 *   MaxAccuracy: 1 ULP (2-step CW + degree-5 poly).
 *   kBoundsValues: 1 ULP + handles NaN/inf.
 * @param x Input value (all float domain).
 * @return exp(x) - 1. Compatible with all SIMD backends.
 *
 * Uses Cody-Waite range reduction: x = n*ln2 + r, |r| <= ln2/2.
 * Then expm1(x) = 2^n * expm1(r) + (2^n - 1), where expm1(r) is computed
 * via a Sollya fpminimax polynomial. Avoids the catastrophic cancellation of
 * exp(x) - 1 near zero, and provides uniform accuracy across the entire
 * domain (no polynomial/fallback transition).
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt expm1(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return expm1<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    using IntT = IntType_t<Flt>;
    using UintT = UintType_t<Flt>;
    auto fma_fn = FloatTraits<Flt>::fma;

    // Cody-Waite range reduction: x = n*ln2 + r, |r| <= ln2/2.
    // 2-step CW is required for all accuracy levels because the
    // reconstruction 2^n * expm1(r) amplifies reduction error by 2^n.
    constexpr float k1_ln2 = k1_Ln2;
    constexpr float kLn2hi = 6.93145752e-1f;
    constexpr float kLn2lo = 1.42860677e-6f;
    Flt r = x;
    Flt jf = detail::rangeReduce(r, k1_ln2, kLn2hi, kLn2lo);
    IntT n = convert_to_int(jf);

    Flt em1_r;

    if constexpr (AccuracyTraits::kMaxAccuracy || AccuracyTraits::kBoundsValues) {
      // Degree-5 Sollya fpminimax((exp(x)-1-x)/x^2, 5, [|SG...|], [-ln2/2, ln2/2]):
      //   sup-norm error < 2^-29 (~0.03 ULP at r = ln2/2).
      Flt p = dispenso::fast_math::hornerEval(
          r,
          0x1.a26762p-13f,
          0x1.6d2ep-10f,
          0x1.110ff2p-7f,
          0x1.555502p-5f,
          0x1.555556p-3f,
          0x1p-1f);
      em1_r = fma_fn(p, r * r, r);
    } else {
      // Degree-4 Sollya fpminimax((exp(x)-1-x)/x^2, 4, [|SG...|], [-ln2/2, ln2/2]):
      //   sup-norm error < 2^-24 (~1.2 ULP at r = ln2/2).
      Flt p = dispenso::fast_math::hornerEval(
          r, 0x1.6ca992p-10f, 0x1.120abep-7f, 0x1.55556cp-5f, 0x1.5554dep-3f, 0x1p-1f);
      em1_r = fma_fn(p, r * r, r);
    }

    // Reconstruction: expm1(x) = 2^n * expm1(r) + (2^n - 1).
    // 2^n is exact for integer n. 2^n - 1 is exact for |n| <= 23.
    // For |n| > 24, 2^n - 1 = 2^n in float, so expm1 ≈ exp which is correct.
    // Use fma for precision: fma(2^n, expm1(r), 2^n - 1).
    Flt two_n = bit_cast<Flt>(UintT(n + 127) << 23);
    Flt two_n_m1 = two_n - 1.0f;
    Flt result = fma_fn(two_n, em1_r, two_n_m1);

    if constexpr (std::is_same_v<Flt, float>) {
      // For n == 0 (|x| < ln2/2 ≈ 0.347): return polynomial directly.
      if (n == 0)
        return em1_r;
      // For x < -25*ln2 ≈ -17.3: expm1(x) rounds to -1 in float.
      if (x < -17.5f)
        return -1.0f;
      // For x > 89: exp(x) overflows, expm1(x) = inf.
      if (x > 89.0f)
        return std::numeric_limits<float>::infinity();
      // NaN propagation: rangeReduce maps NaN to finite r, producing a
      // garbage result.  x - x is 0 for finite, NaN for NaN.
      if constexpr (AccuracyTraits::kBoundsValues)
        return result + (x - x);
      return result;
    } else {
      // SIMD: blend n==0 path (direct polynomial) with reconstruction.
      auto zero_n = (n == IntT(0));
      result = FloatTraits<Flt>::conditional(zero_n, em1_r, result);
      // Clamp: x < -17.5 → -1, x > 89 → inf.
      result = FloatTraits<Flt>::conditional(x < -17.5f, Flt(-1.0f), result);
      constexpr float kInf = std::numeric_limits<float>::infinity();
      result = FloatTraits<Flt>::conditional(x > 89.0f, Flt(kInf), result);

      if constexpr (AccuracyTraits::kBoundsValues) {
        // NaN propagation: range reduction maps NaN to finite values.
        auto is_nan = (bit_cast<UintT>(x) & UintT(0x7fffffffu)) > UintT(0x7f800000u);
        result = FloatTraits<Flt>::conditional(is_nan, x, result);
      }

      return result;
    }
  }
}

/**
 * @brief Compute log(1 + x) with precision near zero.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: ~2 ULP. kBoundsValues: handles x = -1 (→ -inf), NaN.
 * @param x Input value in (-1, +inf).
 * @return log(1 + x). Compatible with all SIMD backends.
 *
 * For |x| < 0.25: direct Sollya polynomial avoids the log() call (~1 ULP).
 * For |x| >= 0.25: compensated-addition trick: u = 1 + x, c = x - (u - 1)
 * captures the rounding error. Then log(u) is computed inline via the same
 * range reduction as log() (logarithmSep), and the result is log(u) + c/u.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt log1p(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return log1p<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else {
    auto fma_fn = FloatTraits<Flt>::fma;

    // Direct polynomial for log1p(x) on [-0.25, 0.25]:
    //   log1p(x) = x - x²/2 + x³/3 - ... ≈ x + x² * q(x)
    // Sollya fpminimax((log(1+x)-x)/x^2, 6, [|SG...|], [-0.25, 0.25]):
    //   sup-norm error < 2^-23; at x=0.25 → ~0.25 ULP polynomial error.
    Flt q = dispenso::fast_math::hornerEval(
        x,
        -0x1.1957b2p-3f,
        0x1.3f347cp-3f,
        -0x1.5472d2p-3f,
        0x1.98bfaap-3f,
        -0x1.000112p-2f,
        0x1.555632p-2f,
        -0x1p-1f);
    Flt poly_r = fma_fn(q, x * x, x); // log1p(x) ≈ x + x² * q(x)

    if constexpr (std::is_same_v<Flt, float>) {
      // Scalar: branch on magnitude.
      if (std::fabs(x) < 0.25f)
        return poly_r;
    }

    // Compensated addition: u = float(1 + x), c = rounding error.
    // log(1 + x) = log(u + c) = log(u) + log(1 + c/u) ≈ log(u) + c/u.
    Flt u = Flt(1.0f) + x;
    Flt c = x - (u - 1.0f);

    // Inline log(u): range-reduce u via logarithmSep, evaluate polynomial.
    auto [xi, i, m] = detail::logarithmSep<Flt, AccuracyTraits>(u);
    m = m - 1.0f;

    // log(1+m) polynomial for m in [sqrt(0.5)-1, sqrt(2.0)-1].
    // Same coefficients as log(). Degree-9 with c0=0, c1=1 absorbed.
    // p(m) = m + m² * P(m). polyEval uses Estrin on CPU (~27% faster),
    // Horner on GPU (better for in-order pipelines).
    Flt inner = dispenso::fast_math::polyEval(
        m,
        0.0924733654f,
        -0.14482744f,
        0.148145974f,
        -0.165455937f,
        0.199700251f,
        -0.250024557f,
        0.333337069f,
        -0.499999911f);
    Flt y = fma_fn(inner, m * m, m); // c0=0, c1=1 absorbed

    y = fma_fn(i, kLn2, y);

    // Add compensation: log1p(x) = log(u) + c/u.
    Flt result = y + c / u;

    if constexpr (!std::is_same_v<Flt, float>) {
      // SIMD: blend polynomial for small |x|, compensated log otherwise.
      auto small = fabs(x) < 0.25f;
      result = FloatTraits<Flt>::conditional(small, poly_r, result);
    }

    if constexpr (AccuracyTraits::kBoundsValues) {
      // log1p(-1) → -inf, log1p(x < -1) → NaN, log1p(NaN) → NaN.
      result = detail::logarithmBounds(u, result, xi);
    }

    return result;
  }
}

/**
 * @brief Hyperbolic tangent approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: ~2 ULP. kBoundsValues: handles NaN.
 * @param x Input value (all float domain).
 * @return tanh(x) in [-1, 1]. Compatible with all SIMD backends.
 *
 * tanh(x) = expm1(2x) / (expm1(2x) + 2).
 * The range-reduced expm1 preserves precision near zero (expm1(2x) ≈ 2x),
 * so this formula naturally gives tanh(x) ≈ x for small x without a
 * separate polynomial. Input clamped to [-10, 10] since tanh(10) = 1.0f
 * exactly in float, and the clamp prevents expm1 overflow for large |x|.
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt tanh(Flt x) {
  assert_float_type<Flt>();
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return tanh<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    // Scalar: Sollya fpminimax polynomial for |x| < 1.0 (degree 6 in u = x²).
    // tanh(x) = x * (1 + u * Q(u)), where Q is fitted to tanh(x)/x - 1.
    // Error: ~0.52 ULP at the boundary. 6 FMAs.
    float ax = std::fabs(x);
    if (!(ax >= 1.0f)) { // !(>=) so NaN enters polynomial path and propagates
      constexpr float t1 = -0x1.555482p-2f;
      constexpr float t2 = 0x1.10ef12p-3f;
      constexpr float t3 = -0x1.b66cecp-5f;
      constexpr float t4 = 0x1.4e3a6ap-6f;
      constexpr float t5 = -0x1.9c954p-8f;
      constexpr float t6 = 0x1.189ec2p-10f;
      float u = x * x;
      float poly =
          std::fma(std::fma(std::fma(std::fma(std::fma(t6, u, t5), u, t4), u, t3), u, t2), u, t1);
      return x * std::fma(poly, u, 1.0f);
    }
    // Large |x|: use expm1 formula (no cancellation, result far from 0).
    float x_safe = ax < 10.0f ? x : (x < 0.0f ? -10.0f : 10.0f);
    float em1 = expm1<float, AccuracyTraits>(x_safe + x_safe);
    return em1 / (em1 + 2.0f);
  } else {
    // SIMD: pure expm1 formula (branchless, uniform across all lanes).
    Flt x_safe = clamp_no_nan(x, Flt(-10.0f), Flt(10.0f));
    Flt em1 = expm1<Flt, AccuracyTraits>(x_safe + x_safe);
    Flt result = em1 / (em1 + 2.0f);

    if constexpr (AccuracyTraits::kBoundsValues) {
      // NaN propagation: clamp_no_nan may map NaN to a finite value.
      using UintT = UintType_t<Flt>;
      auto is_nan = (bit_cast<UintT>(x) & UintT(0x7fffffffu)) > UintT(0x7f800000u);
      result = FloatTraits<Flt>::conditional(is_nan, x, result);
    }

    return result;
  }
}

/**
 * @brief Error function approximation.
 * @tparam Flt float or SIMD float type.
 * @tparam AccuracyTraits Default: ~2 ULP. Not used for bounds (NaN propagates naturally).
 * @param x Input value (all float domain).
 * @return erf(x) in [-1, 1]. Compatible with all SIMD backends.
 *
 * Abramowitz & Stegun 7.1.26-inspired t-substitution with Sollya-optimized
 * coefficients. Uses p=0.45 (vs A&S's 0.3275911) for better polynomial fit.
 *
 * Two domains:
 *   |x| < 0.875:  erf(x) = x * (c0 + x² * Q(x²)),  pure polynomial.
 *   |x| ∈ [0.875, 3.92]:  erf(x) = 1 - t·P(t)·exp(-x²),  t = 1/(1+px).
 *   |x| >= 3.92:  erf(x) = ±1  (saturated in float).
 */
template <typename Flt, typename AccuracyTraits = DefaultAccuracyTraits>
DISPENSO_INLINE Flt erf(Flt x) {
  assert_float_type<Flt>();
  // AccuracyTraits accepted for API consistency; erf uses the same polynomial for all traits.
  (void)sizeof(AccuracyTraits);
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return erf<SimdType_t<Flt>, AccuracyTraits>(SimdType_t<Flt>(x)).v;
  } else if constexpr (std::is_same_v<Flt, float>) {
    // Scalar path: branching for efficiency.
    // Save sign and work with |x|; restore sign at end via bit-OR.
    uint32_t sign = bit_cast<uint32_t>(x) & 0x80000000u;
    float ax = dispenso::fast_math::fabs(x);
    float result;
    if (ax >= 3.92f) {
      result = 1.0f;
    } else if (!(ax < 0.875f)) { // !(< ) so NaN goes to erfc path and propagates
      // erfc formula: erf(x) = 1 - t * P(t) * exp(-x²), t = 1/(1+p*x).
      constexpr float p = 0.45f;
      float t = 1.0f / std::fma(p, ax, 1.0f);

      // Sollya fpminimax degree 5 P(t), float coefficients.
      constexpr float c0 = 0x1.04873ep-2f;
      constexpr float c1 = 0x1.f81fc6p-3f;
      constexpr float c2 = 0x1.189f42p-2f;
      constexpr float c3 = 0x1.15aaa6p-5f;
      constexpr float c4 = 0x1.65d24ep-2f;
      constexpr float c5 = -0x1.4432a4p-3f;
      float poly = t *
          std::fma(std::fma(std::fma(std::fma(std::fma(c5, t, c4), t, c3), t, c2), t, c1), t, c0);

      // Inline exp(-x²) via Cody-Waite range reduction.
      float u = ax * ax;
      constexpr float kLog2e = 0x1.715476p+0f;
      constexpr float kLn2hi = 0x1.62e400p-1f;
      constexpr float kLn2lo = 0x1.7f7d1cp-20f;
      float k = std::floor(u * kLog2e);
      float f = std::fma(k, -kLn2hi, u);
      f = std::fma(k, -kLn2lo, f);
      // Degree-5 Horner for exp(-f), f in [0, ln2).
      constexpr float e0 = 0x1.fffffep-1f, e1 = -0x1.ffff1ep-1f;
      constexpr float e2 = 0x1.ffe314p-2f, e3 = -0x1.53f876p-3f;
      constexpr float e4 = 0x1.462f16p-5f, e5 = -0x1.80e5b2p-8f;
      float exp_neg_f =
          std::fma(std::fma(std::fma(std::fma(std::fma(e5, f, e4), f, e3), f, e2), f, e1), f, e0);
      int32_t ki = convert_to_int_trunc_safe(k);
      float pow2_neg_k = bit_cast<float>((127 - ki) << 23);

      result = 1.0f - pow2_neg_k * exp_neg_f * poly;
    } else {
      // Near-zero: erf(x) = x * (c0 + x² * Q(x²)).
      // Sollya fpminimax degree 5 Q(u), float coefficients.
      constexpr float c0 = 0x1.20dd76p+0f; // ≈ 2/√π
      constexpr float q0 = -0x1.812746p-2f, q1 = 0x1.ce2ec6p-4f, q2 = -0x1.b81edep-6f;
      constexpr float q3 = 0x1.556b48p-8f, q4 = -0x1.b0255p-11f, q5 = 0x1.7149c8p-14f;
      float u = ax * ax;
      float q =
          std::fma(std::fma(std::fma(std::fma(std::fma(q5, u, q4), u, q3), u, q2), u, q1), u, q0);
      result = ax * std::fma(q, u, c0);
    }
    return bit_cast<float>(bit_cast<uint32_t>(result) | sign);
  } else {
    // SIMD: branchless, compute both paths for all lanes and blend.
    using UintT = UintType_t<Flt>;
    using IntT = IntType_t<Flt>;
    auto fma_fn = FloatTraits<Flt>::fma;

    // clamp_allow_nan preserves NaN so it propagates through the computation.
    Flt ax = clamp_allow_nan(fabs(x), Flt(0.0f), Flt(3.92f));

    // --- Near-zero path: erf(x) = x * (c0 + x² * Q(x²)) ---
    Flt u_near = ax * ax;
    constexpr float c0_near = 0x1.20dd76p+0f;
    Flt q_near = hornerEval(
        u_near,
        0x1.7149c8p-14f,
        -0x1.b0255p-11f,
        0x1.556b48p-8f,
        -0x1.b81edep-6f,
        0x1.ce2ec6p-4f,
        -0x1.812746p-2f);
    Flt near_result = ax * fma_fn(q_near, u_near, Flt(c0_near));

    // --- erfc path: erf(x) = 1 - t * P(t) * exp(-x²) ---
    constexpr float p = 0.45f;
    Flt denom = fma_fn(Flt(p), ax, Flt(1.0f));
    Flt t = Flt(1.0f) / denom;

    Flt erfc_poly = t *
        hornerEval(t,
                   -0x1.4432a4p-3f,
                   0x1.65d24ep-2f,
                   0x1.15aaa6p-5f,
                   0x1.189f42p-2f,
                   0x1.f81fc6p-3f,
                   0x1.04873ep-2f);

    // Inline exp(-x²) via Cody-Waite range reduction.
    Flt u = ax * ax;
    constexpr float kLog2e = 0x1.715476p+0f;
    constexpr float kLn2hi = 0x1.62e400p-1f;
    constexpr float kLn2lo = 0x1.7f7d1cp-20f;
    Flt k = floor_small(u * kLog2e);
    Flt f = fma_fn(k, Flt(-kLn2hi), u);
    f = fma_fn(k, Flt(-kLn2lo), f);
    Flt exp_neg_f = hornerEval(
        f,
        -0x1.80e5b2p-8f,
        0x1.462f16p-5f,
        -0x1.53f876p-3f,
        0x1.ffe314p-2f,
        -0x1.ffff1ep-1f,
        0x1.fffffep-1f);
    IntT ki = convert_to_int(k);
    Flt pow2_neg_k = bit_cast<Flt>(UintT((IntT(127) - ki) << 23));

    Flt erfc_result = Flt(1.0f) - pow2_neg_k * exp_neg_f * erfc_poly;

    // Blend: near-zero for |x| < 0.875, erfc otherwise.
    auto small = ax < Flt(0.875f);
    Flt result = FloatTraits<Flt>::conditional(small, near_result, erfc_result);

    // Restore sign from original x (odd function). Result is non-negative, so OR stamps sign.
    result = bit_cast<Flt>(bit_cast<UintT>(result) | (bit_cast<UintT>(x) & UintT(0x80000000u)));

    return result;
  }
}

} // namespace fast_math
} // namespace dispenso


================================================
FILE: dispenso/fast_math/float_traits.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <algorithm>
#include <cmath>
#include <cstdint>

#if defined(__SSE__)
#include <immintrin.h>
#elif defined(__aarch64__)
#include <arm_neon.h>
#endif

#include <dispenso/platform.h>

namespace dispenso {
namespace fast_math {

// Float-precision math constants, avoiding repeated static_cast<float>(M_PI) etc.
constexpr float kPi = static_cast<float>(M_PI);
constexpr float kPi_2 = static_cast<float>(M_PI_2);
constexpr float kPi_4 = static_cast<float>(M_PI_4);
constexpr float kLn2 = static_cast<float>(M_LN2);
constexpr float k1_Ln2 = static_cast<float>(1.0 / M_LN2);

template <typename T>
struct FloatTraits {};

template <>
struct FloatTraits<float> {
  using IntType = int32_t;
  using UintType = uint32_t;
  using BoolType = bool;

  static constexpr uint32_t kOne = 0x3f800000;

  static constexpr float kMagic = 12582912.f; // 1.5 * 2**23

  static constexpr bool kBoolIsMask = false;

  static DISPENSO_INLINE float sqrt(float x) {
    return std::sqrt(x);
  }

  static DISPENSO_INLINE float rcp(float x) {
#if defined(__SSE__)
    return _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(x)));
#elif defined(__aarch64__)
    return vrecpes_f32(x);
#else
    return 1.0f / x;
#endif
  }

  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(bool b, Arg x, Arg y) {
    return b ? x : y;
  }

  template <typename Arg>
  static DISPENSO_INLINE Arg apply(bool b, Arg x) {
    // For kBoolIsMask cases, this is b & x
    return b * x;
  }

  static DISPENSO_INLINE float min(float a, float b) {
    return std::min(a, b);
  }

  static DISPENSO_INLINE float max(float a, float b) {
    return std::max(a, b);
  }

  static DISPENSO_INLINE float fma(float a, float b, float c) {
    return std::fma(a, b, c);
  }
};

template <>
struct FloatTraits<int32_t> {
  using IntType = int32_t;
};

template <>
struct FloatTraits<uint32_t> {
  using IntType = uint32_t;
};

// Non-deduced context helper: prevents template argument deduction on a parameter.
// Use as function parameter type to force callers to rely on deduction from other args.
template <typename T>
struct NonDeducedHelper {
  using type = T;
};
template <typename T>
using NonDeduced = typename NonDeducedHelper<T>::type;

// Maps raw SIMD intrinsic types to their wrapper types for template deduction.
// Default (identity): scalar types and wrapper types map to themselves.
// Specializations in backend headers map __m128 → SseFloat, __m256 → AvxFloat, etc.
// This enables fm::sin(__m128_val) to work via automatic forwarding.
template <typename T>
struct SimdTypeFor {
  using type = T;
};
template <typename T>
using SimdType_t = typename SimdTypeFor<T>::type;

template <typename Flt>
using IntType_t = typename FloatTraits<SimdType_t<Flt>>::IntType;
template <typename Flt>
using UintType_t = typename FloatTraits<SimdType_t<Flt>>::UintType;

template <typename Flt>
using BoolType_t = typename FloatTraits<SimdType_t<Flt>>::BoolType;

} // namespace fast_math
} // namespace dispenso


================================================
FILE: dispenso/fast_math/float_traits_avx.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#if defined(__AVX2__)

#include <immintrin.h>

#include <cstdint>

#include "float_traits.h"
#include "util.h"

namespace dispenso {
namespace fast_math {

struct AvxInt32;
struct AvxUint32;

// 8-wide float SIMD wrapper around __m256.
struct AvxFloat {
  __m256 v;

  AvxFloat() = default;
  AvxFloat(__m256 vec) : v(vec) {}
  // Implicit broadcast from scalar — required for polynomial constant propagation.
  AvxFloat(float f) : v(_mm256_set1_ps(f)) {}
  // Explicit int→float conversion for (Flt)intExpr patterns.
  explicit AvxFloat(AvxInt32 i);

  // Implicit conversion back to raw intrinsic type.
  operator __m256() const {
    return v;
  }

  AvxFloat operator-() const {
    return _mm256_xor_ps(v, _mm256_set1_ps(-0.0f));
  }

  AvxFloat& operator+=(AvxFloat o) {
    v = _mm256_add_ps(v, o.v);
    return *this;
  }
  AvxFloat& operator-=(AvxFloat o) {
    v = _mm256_sub_ps(v, o.v);
    return *this;
  }
  AvxFloat& operator*=(AvxFloat o) {
    v = _mm256_mul_ps(v, o.v);
    return *this;
  }
  AvxFloat& operator&=(AvxFloat o) {
    v = _mm256_and_ps(v, o.v);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend AvxFloat operator+(AvxFloat a, AvxFloat b) {
    return _mm256_add_ps(a.v, b.v);
  }
  friend AvxFloat operator-(AvxFloat a, AvxFloat b) {
    return _mm256_sub_ps(a.v, b.v);
  }
  friend AvxFloat operator*(AvxFloat a, AvxFloat b) {
    return _mm256_mul_ps(a.v, b.v);
  }
  friend AvxFloat operator/(AvxFloat a, AvxFloat b) {
    return _mm256_div_ps(a.v, b.v);
  }

  // Comparisons return AvxFloat masks (all-ones or all-zeros per lane).
  friend AvxFloat operator<(AvxFloat a, AvxFloat b) {
    return _mm256_cmp_ps(a.v, b.v, _CMP_LT_OQ);
  }
  friend AvxFloat operator>(AvxFloat a, AvxFloat b) {
    return _mm256_cmp_ps(a.v, b.v, _CMP_GT_OQ);
  }
  friend AvxFloat operator<=(AvxFloat a, AvxFloat b) {
    return _mm256_cmp_ps(a.v, b.v, _CMP_LE_OQ);
  }
  friend AvxFloat operator>=(AvxFloat a, AvxFloat b) {
    return _mm256_cmp_ps(a.v, b.v, _CMP_GE_OQ);
  }
  friend AvxFloat operator==(AvxFloat a, AvxFloat b) {
    return _mm256_cmp_ps(a.v, b.v, _CMP_EQ_OQ);
  }
  friend AvxFloat operator!=(AvxFloat a, AvxFloat b) {
    return _mm256_cmp_ps(a.v, b.v, _CMP_NEQ_UQ);
  }

  // Logical NOT of a comparison mask.
  friend AvxFloat operator!(AvxFloat a) {
    return _mm256_xor_ps(a.v, _mm256_castsi256_ps(_mm256_set1_epi32(-1)));
  }

  // Bitwise ops on float masks.
  friend AvxFloat operator&(AvxFloat a, AvxFloat b) {
    return _mm256_and_ps(a.v, b.v);
  }
  friend AvxFloat operator|(AvxFloat a, AvxFloat b) {
    return _mm256_or_ps(a.v, b.v);
  }
  friend AvxFloat operator^(AvxFloat a, AvxFloat b) {
    return _mm256_xor_ps(a.v, b.v);
  }
};

// 8-wide int32 SIMD wrapper around __m256i.
struct AvxInt32 {
  __m256i v;

  AvxInt32() = default;
  AvxInt32(__m256i vec) : v(vec) {}
  AvxInt32(int32_t i) : v(_mm256_set1_epi32(i)) {}
  // Reinterpret from AvxUint32 (no-op on __m256i).
  AvxInt32(AvxUint32 u);

  // Implicit conversion back to raw intrinsic type.
  operator __m256i() const {
    return v;
  }

  AvxInt32 operator-() const {
    return _mm256_sub_epi32(_mm256_setzero_si256(), v);
  }

  AvxInt32& operator+=(AvxInt32 o) {
    v = _mm256_add_epi32(v, o.v);
    return *this;
  }
  AvxInt32& operator-=(AvxInt32 o) {
    v = _mm256_sub_epi32(v, o.v);
    return *this;
  }
  AvxInt32& operator*=(AvxInt32 o) {
    v = _mm256_mullo_epi32(v, o.v);
    return *this;
  }
  AvxInt32& operator&=(AvxInt32 o) {
    v = _mm256_and_si256(v, o.v);
    return *this;
  }
  AvxInt32& operator|=(AvxInt32 o) {
    v = _mm256_or_si256(v, o.v);
    return *this;
  }

  // Shifts.
  AvxInt32 operator<<(int n) const {
    return _mm256_slli_epi32(v, n);
  }
  AvxInt32 operator>>(int n) const {
    return _mm256_srai_epi32(v, n); // Arithmetic shift right
  }
  AvxInt32& operator<<=(int n) {
    v = _mm256_slli_epi32(v, n);
    return *this;
  }
  AvxInt32& operator>>=(int n) {
    v = _mm256_srai_epi32(v, n);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend AvxInt32 operator+(AvxInt32 a, AvxInt32 b) {
    return _mm256_add_epi32(a.v, b.v);
  }
  friend AvxInt32 operator-(AvxInt32 a, AvxInt32 b) {
    return _mm256_sub_epi32(a.v, b.v);
  }
  friend AvxInt32 operator*(AvxInt32 a, AvxInt32 b) {
    return _mm256_mullo_epi32(a.v, b.v);
  }

  // Bitwise.
  friend AvxInt32 operator&(AvxInt32 a, AvxInt32 b) {
    return _mm256_and_si256(a.v, b.v);
  }
  friend AvxInt32 operator|(AvxInt32 a, AvxInt32 b) {
    return _mm256_or_si256(a.v, b.v);
  }
  friend AvxInt32 operator^(AvxInt32 a, AvxInt32 b) {
    return _mm256_xor_si256(a.v, b.v);
  }

  friend AvxInt32 operator~(AvxInt32 a) {
    return _mm256_xor_si256(a.v, _mm256_set1_epi32(-1));
  }

  // Comparisons return AvxInt32 masks.
  friend AvxInt32 operator==(AvxInt32 a, AvxInt32 b) {
    return _mm256_cmpeq_epi32(a.v, b.v);
  }
  friend AvxInt32 operator!=(AvxInt32 a, AvxInt32 b) {
    return _mm256_xor_si256(_mm256_cmpeq_epi32(a.v, b.v), _mm256_set1_epi32(-1));
  }
  friend AvxInt32 operator<(AvxInt32 a, AvxInt32 b) {
    return _mm256_cmpgt_epi32(b.v, a.v); // No _mm256_cmplt_epi32; use swapped cmpgt.
  }
  friend AvxInt32 operator>(AvxInt32 a, AvxInt32 b) {
    return _mm256_cmpgt_epi32(a.v, b.v);
  }

  friend AvxInt32 operator!(AvxInt32 a) {
    return _mm256_cmpeq_epi32(a.v, _mm256_setzero_si256());
  }
};

// 8-wide uint32 SIMD wrapper around __m256i.
struct AvxUint32 {
  __m256i v;

  AvxUint32() = default;
  AvxUint32(__m256i vec) : v(vec) {}
  AvxUint32(uint32_t u) : v(_mm256_set1_epi32(static_cast<int32_t>(u))) {}
  // Reinterpret from AvxInt32 (no-op on __m256i).
  AvxUint32(AvxInt32 i) : v(i.v) {}

  // Implicit conversion back to raw intrinsic type.
  operator __m256i() const {
    return v;
  }

  AvxUint32& operator+=(AvxUint32 o) {
    v = _mm256_add_epi32(v, o.v);
    return *this;
  }
  AvxUint32& operator-=(AvxUint32 o) {
    v = _mm256_sub_epi32(v, o.v);
    return *this;
  }
  AvxUint32& operator*=(AvxUint32 o) {
    v = _mm256_mullo_epi32(v, o.v);
    return *this;
  }
  AvxUint32& operator&=(AvxUint32 o) {
    v = _mm256_and_si256(v, o.v);
    return *this;
  }
  AvxUint32& operator|=(AvxUint32 o) {
    v = _mm256_or_si256(v, o.v);
    return *this;
  }

  // Shifts.
  AvxUint32 operator<<(int n) const {
    return _mm256_slli_epi32(v, n);
  }
  AvxUint32 operator>>(int n) const {
    return _mm256_srli_epi32(v, n); // Logical shift right
  }
  AvxUint32& operator<<=(int n) {
    v = _mm256_slli_epi32(v, n);
    return *this;
  }
  AvxUint32& operator>>=(int n) {
    v = _mm256_srli_epi32(v, n);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend AvxUint32 operator+(AvxUint32 a, AvxUint32 b) {
    return _mm256_add_epi32(a.v, b.v);
  }
  friend AvxUint32 operator-(AvxUint32 a, AvxUint32 b) {
    return _mm256_sub_epi32(a.v, b.v);
  }
  friend AvxUint32 operator*(AvxUint32 a, AvxUint32 b) {
    return _mm256_mullo_epi32(a.v, b.v);
  }

  // Bitwise.
  friend AvxUint32 operator&(AvxUint32 a, AvxUint32 b) {
    return _mm256_and_si256(a.v, b.v);
  }
  friend AvxUint32 operator|(AvxUint32 a, AvxUint32 b) {
    return _mm256_or_si256(a.v, b.v);
  }
  friend AvxUint32 operator^(AvxUint32 a, AvxUint32 b) {
    return _mm256_xor_si256(a.v, b.v);
  }

  friend AvxUint32 operator~(AvxUint32 a) {
    return _mm256_xor_si256(a.v, _mm256_set1_epi32(-1));
  }

  // Unsigned comparisons need XOR with sign bit to convert to signed domain.
  friend AvxUint32 operator==(AvxUint32 a, AvxUint32 b) {
    return _mm256_cmpeq_epi32(a.v, b.v);
  }
  friend AvxUint32 operator!=(AvxUint32 a, AvxUint32 b) {
    return _mm256_xor_si256(_mm256_cmpeq_epi32(a.v, b.v), _mm256_set1_epi32(-1));
  }
  friend AvxUint32 operator>(AvxUint32 a, AvxUint32 b) {
    __m256i bias = _mm256_set1_epi32(static_cast<int32_t>(0x80000000u));
    return _mm256_cmpgt_epi32(_mm256_xor_si256(a.v, bias), _mm256_xor_si256(b.v, bias));
  }
  friend AvxUint32 operator<(AvxUint32 a, AvxUint32 b) {
    return b > a;
  }

  friend AvxUint32 operator!(AvxUint32 a) {
    return _mm256_cmpeq_epi32(a.v, _mm256_setzero_si256());
  }
};

// AvxFloat ↔ AvxInt32 conversion.
inline AvxFloat::AvxFloat(AvxInt32 i) : v(_mm256_cvtepi32_ps(i.v)) {}

// AvxInt32 ↔ AvxUint32 reinterpret (no-op on __m256i).
inline AvxInt32::AvxInt32(AvxUint32 u) : v(u.v) {}

// Map raw __m256 to AvxFloat for SimdTypeFor.
template <>
struct SimdTypeFor<__m256> {
  using type = AvxFloat;
};

// --- bit_cast specializations ---

template <>
inline AvxInt32 bit_cast<AvxInt32>(const AvxFloat& f) noexcept {
  return _mm256_castps_si256(f.v);
}
template <>
inline AvxUint32 bit_cast<AvxUint32>(const AvxFloat& f) noexcept {
  return _mm256_castps_si256(f.v);
}
template <>
inline AvxFloat bit_cast<AvxFloat>(const AvxInt32& i) noexcept {
  return _mm256_castsi256_ps(i.v);
}
template <>
inline AvxFloat bit_cast<AvxFloat>(const AvxUint32& u) noexcept {
  return _mm256_castsi256_ps(u.v);
}
template <>
inline AvxInt32 bit_cast<AvxInt32>(const AvxUint32& u) noexcept {
  return u.v;
}
template <>
inline AvxUint32 bit_cast<AvxUint32>(const AvxInt32& i) noexcept {
  return i.v;
}

// --- FloatTraits<AvxFloat> ---

template <>
struct FloatTraits<AvxFloat> {
  using IntType = AvxInt32;
  using UintType = AvxUint32;
  using BoolType = AvxFloat; // Float comparison masks

  static constexpr uint32_t kOne = 0x3f800000;
  static constexpr float kMagic = 12582912.f;
  static constexpr bool kBoolIsMask = true;

  static DISPENSO_INLINE AvxFloat sqrt(AvxFloat x) {
    return _mm256_sqrt_ps(x.v);
  }

  static DISPENSO_INLINE AvxFloat rcp(AvxFloat x) {
    return _mm256_rcp_ps(x.v);
  }

  static DISPENSO_INLINE AvxFloat fma(AvxFloat a, AvxFloat b, AvxFloat c) {
#if defined(__FMA__)
    return _mm256_fmadd_ps(a.v, b.v, c.v);
#else
    return _mm256_add_ps(_mm256_mul_ps(a.v, b.v), c.v);
#endif
  }

  // conditional: select x where mask is true, y where false.
  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(AvxFloat mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(AvxInt32 mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg apply(AvxFloat mask, Arg x);

  static DISPENSO_INLINE AvxFloat min(AvxFloat a, AvxFloat b) {
    return _mm256_min_ps(a.v, b.v);
  }

  static DISPENSO_INLINE AvxFloat max(AvxFloat a, AvxFloat b) {
    return _mm256_max_ps(a.v, b.v);
  }
};

// conditional specializations.
template <>
inline AvxFloat FloatTraits<AvxFloat>::conditional(AvxFloat mask, AvxFloat x, AvxFloat y) {
  return _mm256_blendv_ps(y.v, x.v, mask.v);
}
template <>
inline AvxInt32 FloatTraits<AvxFloat>::conditional(AvxFloat mask, AvxInt32 x, AvxInt32 y) {
  return _mm256_castps_si256(
      _mm256_blendv_ps(_mm256_castsi256_ps(y.v), _mm256_castsi256_ps(x.v), mask.v));
}
template <>
inline AvxUint32 FloatTraits<AvxFloat>::conditional(AvxFloat mask, AvxUint32 x, AvxUint32 y) {
  return _mm256_castps_si256(
      _mm256_blendv_ps(_mm256_castsi256_ps(y.v), _mm256_castsi256_ps(x.v), mask.v));
}

template <>
inline AvxFloat FloatTraits<AvxFloat>::conditional(AvxInt32 mask, AvxFloat x, AvxFloat y) {
  return _mm256_blendv_ps(y.v, x.v, _mm256_castsi256_ps(mask.v));
}
template <>
inline AvxInt32 FloatTraits<AvxFloat>::conditional(AvxInt32 mask, AvxInt32 x, AvxInt32 y) {
  return _mm256_castps_si256(_mm256_blendv_ps(
      _mm256_castsi256_ps(y.v), _mm256_castsi256_ps(x.v), _mm256_castsi256_ps(mask.v)));
}
template <>
inline AvxUint32 FloatTraits<AvxFloat>::conditional(AvxInt32 mask, AvxUint32 x, AvxUint32 y) {
  return _mm256_castps_si256(_mm256_blendv_ps(
      _mm256_castsi256_ps(y.v), _mm256_castsi256_ps(x.v), _mm256_castsi256_ps(mask.v)));
}

// apply: mask & x (bitwise AND).
template <>
inline AvxFloat FloatTraits<AvxFloat>::apply(AvxFloat mask, AvxFloat x) {
  return _mm256_and_ps(mask.v, x.v);
}
template <>
inline AvxInt32 FloatTraits<AvxFloat>::apply(AvxFloat mask, AvxInt32 x) {
  return _mm256_and_si256(_mm256_castps_si256(mask.v), x.v);
}
template <>
inline AvxUint32 FloatTraits<AvxFloat>::apply(AvxFloat mask, AvxUint32 x) {
  return _mm256_and_si256(_mm256_castps_si256(mask.v), x.v);
}

template <>
struct FloatTraits<AvxInt32> {
  using IntType = AvxInt32;
};

template <>
struct FloatTraits<AvxUint32> {
  using IntType = AvxUint32;
};

// --- Util function overloads for AVX types ---

DISPENSO_INLINE AvxFloat floor_small(AvxFloat x) {
  return _mm256_floor_ps(x.v);
}

DISPENSO_INLINE AvxInt32 convert_to_int_trunc(AvxFloat f) {
  return _mm256_cvttps_epi32(f.v);
}

DISPENSO_INLINE AvxInt32 convert_to_int_trunc_safe(AvxFloat f) {
  AvxInt32 fi = bit_cast<AvxInt32>(f);
  AvxInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & AvxInt32(_mm256_cvttps_epi32(f.v));
}

DISPENSO_INLINE AvxInt32 convert_to_int(AvxFloat f) {
  // _mm256_cvtps_epi32 uses round-to-nearest-even.
  // Mask non-normals to 0 to avoid undefined behavior.
  AvxInt32 fi = bit_cast<AvxInt32>(f);
  AvxInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & AvxInt32(_mm256_cvtps_epi32(f.v));
}

template <>
DISPENSO_INLINE AvxFloat min<AvxFloat>(AvxFloat x, AvxFloat mn) {
  // Ordering: if x is NaN, result is mn (second operand).
  return _mm256_min_ps(x.v, mn.v);
}

template <>
DISPENSO_INLINE AvxFloat clamp_allow_nan<AvxFloat>(AvxFloat x, AvxFloat mn, AvxFloat mx) {
  return _mm256_max_ps(mn.v, _mm256_min_ps(mx.v, x.v));
}

template <>
DISPENSO_INLINE AvxFloat clamp_no_nan<AvxFloat>(AvxFloat x, AvxFloat mn, AvxFloat mx) {
  return _mm256_max_ps(mn.v, _mm256_min_ps(x.v, mx.v));
}

template <>
DISPENSO_INLINE AvxFloat gather<AvxFloat>(const float* table, AvxInt32 index) {
  return _mm256_i32gather_ps(table, index.v, 4);
}

DISPENSO_INLINE AvxInt32 int_div_by_3(AvxInt32 i) {
  // Multiply each lane by 0x55555556 and take the high 32 bits.
  // Process even and odd lanes separately since _mm256_mul_epu32 only uses lanes 0,2,4,6.
  __m256i multiplier = _mm256_set1_epi32(0x55555556);
  // Even lanes (0, 2, 4, 6).
  __m256i even = _mm256_srli_epi64(_mm256_mul_epu32(i.v, multiplier), 32);
  // Odd lanes (1, 3, 5, 7): shift right by 32 bits to put odd lanes in even positions.
  __m256i i_odd = _mm256_srli_epi64(i.v, 32);
  __m256i odd = _mm256_srli_epi64(_mm256_mul_epu32(i_odd, multiplier), 32);
  odd = _mm256_slli_epi64(odd, 32);
  // Blend at 32-bit granularity: even indices from even, odd from odd.
  return _mm256_blend_epi32(even, odd, 0xAA); // 0xAA = 10101010b
}

// nonnormal/nonnormalOrZero: return AvxInt32 masks for AVX types.
DISPENSO_INLINE AvxInt32 nonnormal(AvxInt32 i) {
  return (i & 0x7f800000) == 0x7f800000;
}

DISPENSO_INLINE AvxInt32 nonnormalOrZero(AvxInt32 i) {
  auto m = i & 0x7f800000;
  return (m == 0x7f800000) | (m == 0);
}

DISPENSO_INLINE AvxInt32 nonnormal(AvxFloat f) {
  return nonnormal(bit_cast<AvxInt32>(f));
}

// any_true: reduce SIMD mask to scalar bool (true if any lane is set).
DISPENSO_INLINE bool any_true(AvxInt32 mask) {
  return _mm256_movemask_ps(_mm256_castsi256_ps(mask.v)) != 0;
}

DISPENSO_INLINE AvxFloat signof(AvxFloat x) {
  AvxUint32 xi = bit_cast<AvxUint32>(x);
  return bit_cast<AvxFloat>((xi & 0x80000000u) | FloatTraits<AvxFloat>::kOne);
}

DISPENSO_INLINE AvxInt32 signofi(AvxInt32 i) {
  return AvxInt32(1) - (AvxInt32(2) & (i < AvxInt32(0)));
}

// nbool_as_one: 0 if mask is true (all-ones), 1 if false (all-zeros).
template <>
DISPENSO_INLINE AvxFloat nbool_as_one<AvxFloat, AvxFloat>(AvxFloat b) {
  // ~mask & 1.0f bits
  return bit_cast<AvxFloat>(
      AvxInt32(_mm256_andnot_si256(_mm256_castps_si256(b.v), _mm256_set1_epi32(0x3f800000))));
}

template <>
DISPENSO_INLINE AvxInt32 nbool_as_one<AvxInt32, AvxFloat>(AvxFloat b) {
  return _mm256_andnot_si256(_mm256_castps_si256(b.v), _mm256_set1_epi32(1));
}

template <>
DISPENSO_INLINE AvxInt32 nbool_as_one<AvxInt32, AvxInt32>(AvxInt32 b) {
  return _mm256_andnot_si256(b.v, _mm256_set1_epi32(1));
}

// bool_as_one: 1 if mask is true, 0 if false.
template <>
DISPENSO_INLINE AvxFloat bool_as_one<AvxFloat, AvxFloat>(AvxFloat b) {
  return bit_cast<AvxFloat>(
      AvxInt32(_mm256_and_si256(_mm256_castps_si256(b.v), _mm256_set1_epi32(0x3f800000))));
}

template <>
DISPENSO_INLINE AvxInt32 bool_as_one<AvxInt32, AvxFloat>(AvxFloat b) {
  return _mm256_and_si256(_mm256_castps_si256(b.v), _mm256_set1_epi32(1));
}

// bool_as_mask: for SIMD masks, identity (already a mask).
template <>
DISPENSO_INLINE AvxInt32 bool_as_mask<AvxInt32, AvxFloat>(AvxFloat b) {
  return _mm256_castps_si256(b.v);
}

template <>
DISPENSO_INLINE AvxInt32 bool_as_mask<AvxInt32, AvxInt32>(AvxInt32 b) {
  return b;
}

template <>
DISPENSO_INLINE AvxInt32 bool_as_mask<AvxInt32, AvxUint32>(AvxUint32 b) {
  return b.v;
}

template <>
DISPENSO_INLINE AvxUint32 bool_as_mask<AvxUint32, AvxFloat>(AvxFloat b) {
  return _mm256_castps_si256(b.v);
}

template <>
DISPENSO_INLINE AvxUint32 bool_as_mask<AvxUint32, AvxUint32>(AvxUint32 b) {
  return b;
}

template <>
DISPENSO_INLINE AvxUint32 bool_as_mask<AvxUint32, AvxInt32>(AvxInt32 b) {
  return b.v;
}

} // namespace fast_math
} // namespace dispenso

#endif // defined(__AVX2__)


================================================
FILE: dispenso/fast_math/float_traits_avx512.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#if defined(__AVX512F__)

#include <immintrin.h>

#include <cstdint>

#include "float_traits.h"
#include "util.h"

namespace dispenso {
namespace fast_math {

struct Avx512Int32;
struct Avx512Uint32;

// 16-bit mask wrapping __mmask16 for AVX-512 predicated operations.
// Supports comparison with int literals (mask == 0 → all-false check),
// logical ops, and implicit conversion to Avx512Int32 for template code
// that assigns BoolType to IntType.
struct Avx512Mask {
  __mmask16 m;

  Avx512Mask() = default;
  Avx512Mask(__mmask16 mask) : m(mask) {}
  // Construct from int literal: 0 → all-false, non-zero → all-true.
  // Used by template code like `(expr) == 0` where expr is BoolType.
  Avx512Mask(int val) : m(val ? 0xFFFF : 0) {}

  // Logical NOT.
  friend Avx512Mask operator!(Avx512Mask a) {
    return static_cast<__mmask16>(~a.m);
  }

  // Logical AND/OR/XOR.
  friend Avx512Mask operator&(Avx512Mask a, Avx512Mask b) {
    return static_cast<__mmask16>(a.m & b.m);
  }
  friend Avx512Mask operator|(Avx512Mask a, Avx512Mask b) {
    return static_cast<__mmask16>(a.m | b.m);
  }
  friend Avx512Mask operator^(Avx512Mask a, Avx512Mask b) {
    return static_cast<__mmask16>(a.m ^ b.m);
  }

  Avx512Mask& operator&=(Avx512Mask o) {
    m &= o.m;
    return *this;
  }
  Avx512Mask& operator|=(Avx512Mask o) {
    m |= o.m;
    return *this;
  }

  // Equality: per-bit XNOR. mask == Avx512Mask(0) is equivalent to !mask.
  friend Avx512Mask operator==(Avx512Mask a, Avx512Mask b) {
    return static_cast<__mmask16>(~(a.m ^ b.m));
  }
  friend Avx512Mask operator!=(Avx512Mask a, Avx512Mask b) {
    return static_cast<__mmask16>(a.m ^ b.m);
  }

  // Implicit conversion to Avx512Int32 (lane-wide mask: all-ones or all-zeros).
  // Used by template code that assigns BoolType to IntType_t<Flt>.
  inline operator Avx512Int32() const;
};

// 16-wide float SIMD wrapper around __m512.
struct Avx512Float {
  __m512 v;

  Avx512Float() = default;
  Avx512Float(__m512 vec) : v(vec) {}
  // Implicit broadcast from scalar — required for polynomial constant propagation.
  Avx512Float(float f) : v(_mm512_set1_ps(f)) {}
  // Explicit int→float conversion for (Flt)intExpr patterns.
  explicit Avx512Float(Avx512Int32 i);

  // Implicit conversion back to raw intrinsic type.
  operator __m512() const {
    return v;
  }

  Avx512Float operator-() const {
    // _mm512_xor_ps requires AVX-512 DQ; use integer XOR on sign bit instead.
    return _mm512_castsi512_ps(
        _mm512_xor_si512(_mm512_castps_si512(v), _mm512_set1_epi32(int32_t(0x80000000))));
  }

  Avx512Float& operator+=(Avx512Float o) {
    v = _mm512_add_ps(v, o.v);
    return *this;
  }
  Avx512Float& operator-=(Avx512Float o) {
    v = _mm512_sub_ps(v, o.v);
    return *this;
  }
  Avx512Float& operator*=(Avx512Float o) {
    v = _mm512_mul_ps(v, o.v);
    return *this;
  }
  Avx512Float& operator&=(Avx512Float o) {
    // _mm512_and_ps requires AVX-512 DQ; use integer AND instead.
    v = _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(v), _mm512_castps_si512(o.v)));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend Avx512Float operator+(Avx512Float a, Avx512Float b) {
    return _mm512_add_ps(a.v, b.v);
  }
  friend Avx512Float operator-(Avx512Float a, Avx512Float b) {
    return _mm512_sub_ps(a.v, b.v);
  }
  friend Avx512Float operator*(Avx512Float a, Avx512Float b) {
    return _mm512_mul_ps(a.v, b.v);
  }
  friend Avx512Float operator/(Avx512Float a, Avx512Float b) {
    return _mm512_div_ps(a.v, b.v);
  }

  // Comparisons return Avx512Mask (native __mmask16).
  friend Avx512Mask operator<(Avx512Float a, Avx512Float b) {
    return _mm512_cmp_ps_mask(a.v, b.v, _CMP_LT_OQ);
  }
  friend Avx512Mask operator>(Avx512Float a, Avx512Float b) {
    return _mm512_cmp_ps_mask(a.v, b.v, _CMP_GT_OQ);
  }
  friend Avx512Mask operator<=(Avx512Float a, Avx512Float b) {
    return _mm512_cmp_ps_mask(a.v, b.v, _CMP_LE_OQ);
  }
  friend Avx512Mask operator>=(Avx512Float a, Avx512Float b) {
    return _mm512_cmp_ps_mask(a.v, b.v, _CMP_GE_OQ);
  }
  friend Avx512Mask operator==(Avx512Float a, Avx512Float b) {
    return _mm512_cmp_ps_mask(a.v, b.v, _CMP_EQ_OQ);
  }
  friend Avx512Mask operator!=(Avx512Float a, Avx512Float b) {
    return _mm512_cmp_ps_mask(a.v, b.v, _CMP_NEQ_UQ);
  }

  // Bitwise ops on float values (for sign manipulation, etc.).
  // _mm512_{and,or,xor}_ps require AVX-512 DQ; use integer ops + casts.
  friend Avx512Float operator&(Avx512Float a, Avx512Float b) {
    return _mm512_castsi512_ps(
        _mm512_and_si512(_mm512_castps_si512(a.v), _mm512_castps_si512(b.v)));
  }
  friend Avx512Float operator|(Avx512Float a, Avx512Float b) {
    return _mm512_castsi512_ps(_mm512_or_si512(_mm512_castps_si512(a.v), _mm512_castps_si512(b.v)));
  }
  friend Avx512Float operator^(Avx512Float a, Avx512Float b) {
    return _mm512_castsi512_ps(
        _mm512_xor_si512(_mm512_castps_si512(a.v), _mm512_castps_si512(b.v)));
  }
};

// 16-wide int32 SIMD wrapper around __m512i.
struct Avx512Int32 {
  __m512i v;

  Avx512Int32() = default;
  Avx512Int32(__m512i vec) : v(vec) {}
  Avx512Int32(int32_t i) : v(_mm512_set1_epi32(i)) {}
  // Reinterpret from Avx512Uint32 (no-op on __m512i).
  Avx512Int32(Avx512Uint32 u);

  // Implicit conversion back to raw intrinsic type.
  operator __m512i() const {
    return v;
  }

  Avx512Int32 operator-() const {
    return _mm512_sub_epi32(_mm512_setzero_si512(), v);
  }

  Avx512Int32& operator+=(Avx512Int32 o) {
    v = _mm512_add_epi32(v, o.v);
    return *this;
  }
  Avx512Int32& operator-=(Avx512Int32 o) {
    v = _mm512_sub_epi32(v, o.v);
    return *this;
  }
  Avx512Int32& operator*=(Avx512Int32 o) {
    v = _mm512_mullo_epi32(v, o.v);
    return *this;
  }
  Avx512Int32& operator&=(Avx512Int32 o) {
    v = _mm512_and_si512(v, o.v);
    return *this;
  }
  Avx512Int32& operator|=(Avx512Int32 o) {
    v = _mm512_or_si512(v, o.v);
    return *this;
  }

  // Shifts.
  Avx512Int32 operator<<(int n) const {
    return _mm512_slli_epi32(v, n);
  }
  Avx512Int32 operator>>(int n) const {
    return _mm512_srai_epi32(v, n); // Arithmetic shift right
  }
  Avx512Int32& operator<<=(int n) {
    v = _mm512_slli_epi32(v, n);
    return *this;
  }
  Avx512Int32& operator>>=(int n) {
    v = _mm512_srai_epi32(v, n);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend Avx512Int32 operator+(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_add_epi32(a.v, b.v);
  }
  friend Avx512Int32 operator-(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_sub_epi32(a.v, b.v);
  }
  friend Avx512Int32 operator*(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_mullo_epi32(a.v, b.v);
  }

  // Bitwise.
  friend Avx512Int32 operator&(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_and_si512(a.v, b.v);
  }
  friend Avx512Int32 operator|(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_or_si512(a.v, b.v);
  }
  friend Avx512Int32 operator^(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_xor_si512(a.v, b.v);
  }

  friend Avx512Int32 operator~(Avx512Int32 a) {
    return _mm512_xor_si512(a.v, _mm512_set1_epi32(-1));
  }

  // Comparisons return Avx512Mask (native __mmask16).
  friend Avx512Mask operator==(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_cmpeq_epi32_mask(a.v, b.v);
  }
  friend Avx512Mask operator!=(Avx512Int32 a, Avx512Int32 b) {
    return static_cast<__mmask16>(~_mm512_cmpeq_epi32_mask(a.v, b.v));
  }
  friend Avx512Mask operator<(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_cmplt_epi32_mask(a.v, b.v);
  }
  friend Avx512Mask operator>(Avx512Int32 a, Avx512Int32 b) {
    return _mm512_cmpgt_epi32_mask(a.v, b.v);
  }

  friend Avx512Mask operator!(Avx512Int32 a) {
    return _mm512_cmpeq_epi32_mask(a.v, _mm512_setzero_si512());
  }
};

// 16-wide uint32 SIMD wrapper around __m512i.
struct Avx512Uint32 {
  __m512i v;

  Avx512Uint32() = default;
  Avx512Uint32(__m512i vec) : v(vec) {}
  Avx512Uint32(uint32_t u) : v(_mm512_set1_epi32(static_cast<int32_t>(u))) {}
  // Reinterpret from Avx512Int32 (no-op on __m512i).
  Avx512Uint32(Avx512Int32 i) : v(i.v) {}

  // Implicit conversion back to raw intrinsic type.
  operator __m512i() const {
    return v;
  }

  Avx512Uint32& operator+=(Avx512Uint32 o) {
    v = _mm512_add_epi32(v, o.v);
    return *this;
  }
  Avx512Uint32& operator-=(Avx512Uint32 o) {
    v = _mm512_sub_epi32(v, o.v);
    return *this;
  }
  Avx512Uint32& operator*=(Avx512Uint32 o) {
    v = _mm512_mullo_epi32(v, o.v);
    return *this;
  }
  Avx512Uint32& operator&=(Avx512Uint32 o) {
    v = _mm512_and_si512(v, o.v);
    return *this;
  }
  Avx512Uint32& operator|=(Avx512Uint32 o) {
    v = _mm512_or_si512(v, o.v);
    return *this;
  }

  // Shifts.
  Avx512Uint32 operator<<(int n) const {
    return _mm512_slli_epi32(v, n);
  }
  Avx512Uint32 operator>>(int n) const {
    return _mm512_srli_epi32(v, n); // Logical shift right
  }
  Avx512Uint32& operator<<=(int n) {
    v = _mm512_slli_epi32(v, n);
    return *this;
  }
  Avx512Uint32& operator>>=(int n) {
    v = _mm512_srli_epi32(v, n);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend Avx512Uint32 operator+(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_add_epi32(a.v, b.v);
  }
  friend Avx512Uint32 operator-(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_sub_epi32(a.v, b.v);
  }
  friend Avx512Uint32 operator*(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_mullo_epi32(a.v, b.v);
  }

  // Bitwise.
  friend Avx512Uint32 operator&(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_and_si512(a.v, b.v);
  }
  friend Avx512Uint32 operator|(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_or_si512(a.v, b.v);
  }
  friend Avx512Uint32 operator^(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_xor_si512(a.v, b.v);
  }

  friend Avx512Uint32 operator~(Avx512Uint32 a) {
    return _mm512_xor_si512(a.v, _mm512_set1_epi32(-1));
  }

  // AVX-512 has native unsigned comparisons.
  friend Avx512Mask operator==(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_cmpeq_epi32_mask(a.v, b.v);
  }
  friend Avx512Mask operator!=(Avx512Uint32 a, Avx512Uint32 b) {
    return static_cast<__mmask16>(~_mm512_cmpeq_epi32_mask(a.v, b.v));
  }
  friend Avx512Mask operator>(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_cmpgt_epu32_mask(a.v, b.v);
  }
  friend Avx512Mask operator<(Avx512Uint32 a, Avx512Uint32 b) {
    return _mm512_cmplt_epu32_mask(a.v, b.v);
  }

  friend Avx512Mask operator!(Avx512Uint32 a) {
    return _mm512_cmpeq_epi32_mask(a.v, _mm512_setzero_si512());
  }
};

// --- Deferred inline definitions ---

// Avx512Mask → Avx512Int32: expand to lane-wide mask (all-ones or all-zeros).
inline Avx512Mask::operator Avx512Int32() const {
  return _mm512_maskz_set1_epi32(m, -1);
}

// Avx512Float ↔ Avx512Int32 conversion.
inline Avx512Float::Avx512Float(Avx512Int32 i) : v(_mm512_cvtepi32_ps(i.v)) {}

// Avx512Int32 ↔ Avx512Uint32 reinterpret (no-op on __m512i).
inline Avx512Int32::Avx512Int32(Avx512Uint32 u) : v(u.v) {}

// Map raw __m512 to Avx512Float for SimdTypeFor.
template <>
struct SimdTypeFor<__m512> {
  using type = Avx512Float;
};

// --- bit_cast specializations ---

template <>
inline Avx512Int32 bit_cast<Avx512Int32>(const Avx512Float& f) noexcept {
  return _mm512_castps_si512(f.v);
}
template <>
inline Avx512Uint32 bit_cast<Avx512Uint32>(const Avx512Float& f) noexcept {
  return _mm512_castps_si512(f.v);
}
template <>
inline Avx512Float bit_cast<Avx512Float>(const Avx512Int32& i) noexcept {
  return _mm512_castsi512_ps(i.v);
}
template <>
inline Avx512Float bit_cast<Avx512Float>(const Avx512Uint32& u) noexcept {
  return _mm512_castsi512_ps(u.v);
}
template <>
inline Avx512Int32 bit_cast<Avx512Int32>(const Avx512Uint32& u) noexcept {
  return u.v;
}
template <>
inline Avx512Uint32 bit_cast<Avx512Uint32>(const Avx512Int32& i) noexcept {
  return i.v;
}

// --- FloatTraits<Avx512Float> ---

template <>
struct FloatTraits<Avx512Float> {
  using IntType = Avx512Int32;
  using UintType = Avx512Uint32;
  using BoolType = Avx512Mask;

  static constexpr uint32_t kOne = 0x3f800000;
  static constexpr float kMagic = 12582912.f;
  static constexpr bool kBoolIsMask = true;

  static DISPENSO_INLINE Avx512Float sqrt(Avx512Float x) {
    return _mm512_sqrt_ps(x.v);
  }

  static DISPENSO_INLINE Avx512Float rcp(Avx512Float x) {
    return _mm512_rcp14_ps(x.v);
  }

  // AVX-512 always has FMA.
  static DISPENSO_INLINE Avx512Float fma(Avx512Float a, Avx512Float b, Avx512Float c) {
    return _mm512_fmadd_ps(a.v, b.v, c.v);
  }

  // conditional: select x where mask is true, y where false.
  // Native __mmask16 versions — the fast path.
  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(Avx512Mask mask, Arg x, Arg y);

  // Lane-wide int32 mask versions — for compatibility with code using IntType masks.
  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(Avx512Int32 mask, Arg x, Arg y);

  // apply: zero out lanes where mask is false.
  template <typename Arg>
  static DISPENSO_INLINE Arg apply(Avx512Mask mask, Arg x);

  static DISPENSO_INLINE Avx512Float min(Avx512Float a, Avx512Float b) {
    return _mm512_min_ps(a.v, b.v);
  }

  static DISPENSO_INLINE Avx512Float max(Avx512Float a, Avx512Float b) {
    return _mm512_max_ps(a.v, b.v);
  }
};

// conditional specializations (Avx512Mask).
template <>
inline Avx512Float
FloatTraits<Avx512Float>::conditional(Avx512Mask mask, Avx512Float x, Avx512Float y) {
  return _mm512_mask_blend_ps(mask.m, y.v, x.v);
}
template <>
inline Avx512Int32
FloatTraits<Avx512Float>::conditional(Avx512Mask mask, Avx512Int32 x, Avx512Int32 y) {
  return _mm512_mask_blend_epi32(mask.m, y.v, x.v);
}
template <>
inline Avx512Uint32
FloatTraits<Avx512Float>::conditional(Avx512Mask mask, Avx512Uint32 x, Avx512Uint32 y) {
  return _mm512_mask_blend_epi32(mask.m, y.v, x.v);
}

// conditional specializations (Avx512Int32 lane-wide mask).
// Convert lane-wide mask to __mmask16 by checking sign bit (equivalent to movepi32_mask,
// which requires AVX-512 DQ).
template <>
inline Avx512Float
FloatTraits<Avx512Float>::conditional(Avx512Int32 mask, Avx512Float x, Avx512Float y) {
  __mmask16 m = _mm512_cmplt_epi32_mask(mask.v, _mm512_setzero_si512());
  return _mm512_mask_blend_ps(m, y.v, x.v);
}
template <>
inline Avx512Int32
FloatTraits<Avx512Float>::conditional(Avx512Int32 mask, Avx512Int32 x, Avx512Int32 y) {
  __mmask16 m = _mm512_cmplt_epi32_mask(mask.v, _mm512_setzero_si512());
  return _mm512_mask_blend_epi32(m, y.v, x.v);
}
template <>
inline Avx512Uint32
FloatTraits<Avx512Float>::conditional(Avx512Int32 mask, Avx512Uint32 x, Avx512Uint32 y) {
  __mmask16 m = _mm512_cmplt_epi32_mask(mask.v, _mm512_setzero_si512());
  return _mm512_mask_blend_epi32(m, y.v, x.v);
}

// apply specializations (Avx512Mask): zero out lanes where mask is false.
template <>
inline Avx512Float FloatTraits<Avx512Float>::apply(Avx512Mask mask, Avx512Float x) {
  return _mm512_maskz_mov_ps(mask.m, x.v);
}
template <>
inline Avx512Int32 FloatTraits<Avx512Float>::apply(Avx512Mask mask, Avx512Int32 x) {
  return _mm512_maskz_mov_epi32(mask.m, x.v);
}
template <>
inline Avx512Uint32 FloatTraits<Avx512Float>::apply(Avx512Mask mask, Avx512Uint32 x) {
  return _mm512_maskz_mov_epi32(mask.m, x.v);
}

template <>
struct FloatTraits<Avx512Int32> {
  using IntType = Avx512Int32;
};

template <>
struct FloatTraits<Avx512Uint32> {
  using IntType = Avx512Uint32;
};

// --- Util function overloads for AVX-512 types ---

DISPENSO_INLINE Avx512Float floor_small(Avx512Float x) {
  return _mm512_floor_ps(x.v);
}

DISPENSO_INLINE Avx512Int32 convert_to_int_trunc(Avx512Float f) {
  return _mm512_cvttps_epi32(f.v);
}

DISPENSO_INLINE Avx512Int32 convert_to_int_trunc_safe(Avx512Float f) {
  Avx512Int32 fi = bit_cast<Avx512Int32>(f);
  __mmask16 norm = static_cast<__mmask16>(~_mm512_cmpeq_epi32_mask(
      _mm512_and_si512(fi.v, _mm512_set1_epi32(0x7f800000)), _mm512_set1_epi32(0x7f800000)));
  return _mm512_maskz_cvttps_epi32(norm, f.v);
}

DISPENSO_INLINE Avx512Int32 convert_to_int(Avx512Float f) {
  // _mm512_cvtps_epi32 uses round-to-nearest-even.
  // Use maskz to zero non-normal lanes and avoid undefined behavior.
  Avx512Int32 fi = bit_cast<Avx512Int32>(f);
  __mmask16 norm = static_cast<__mmask16>(~_mm512_cmpeq_epi32_mask(
      _mm512_and_si512(fi.v, _mm512_set1_epi32(0x7f800000)), _mm512_set1_epi32(0x7f800000)));
  return _mm512_maskz_cvtps_epi32(norm, f.v);
}

template <>
DISPENSO_INLINE Avx512Float min<Avx512Float>(Avx512Float x, Avx512Float mn) {
  // Ordering: if x is NaN, result is mn (second operand).
  return _mm512_min_ps(x.v, mn.v);
}

template <>
DISPENSO_INLINE Avx512Float
clamp_allow_nan<Avx512Float>(Avx512Float x, Avx512Float mn, Avx512Float mx) {
  return _mm512_max_ps(mn.v, _mm512_min_ps(mx.v, x.v));
}

template <>
DISPENSO_INLINE Avx512Float
clamp_no_nan<Avx512Float>(Avx512Float x, Avx512Float mn, Avx512Float mx) {
  return _mm512_max_ps(mn.v, _mm512_min_ps(x.v, mx.v));
}

template <>
DISPENSO_INLINE Avx512Float gather<Avx512Float>(const float* table, Avx512Int32 index) {
  // Note: AVX-512 gather has index as first arg, base as second.
  return _mm512_i32gather_ps(index.v, table, 4);
}

DISPENSO_INLINE Avx512Int32 int_div_by_3(Avx512Int32 i) {
  // Multiply each lane by 0x55555556 and take the high 32 bits.
  // Process even and odd lanes separately since _mm512_mul_epu32 only uses lanes 0,2,...
  __m512i multiplier = _mm512_set1_epi32(0x55555556);
  // Even lanes (0, 2, 4, ..., 14).
  __m512i even = _mm512_srli_epi64(_mm512_mul_epu32(i.v, multiplier), 32);
  // Odd lanes (1, 3, 5, ..., 15): shift right by 32 to put odd in even positions.
  __m512i i_odd = _mm512_srli_epi64(i.v, 32);
  __m512i odd = _mm512_srli_epi64(_mm512_mul_epu32(i_odd, multiplier), 32);
  odd = _mm512_slli_epi64(odd, 32);
  // Blend at 32-bit granularity: even indices from even, odd from odd.
  return _mm512_mask_blend_epi32(0xAAAA, even, odd); // 0xAAAA = odd lanes from 'odd'
}

// nonnormal/nonnormalOrZero: return Avx512Mask for AVX-512 types.
DISPENSO_INLINE Avx512Mask nonnormal(Avx512Int32 i) {
  return _mm512_cmpeq_epi32_mask(
      _mm512_and_si512(i.v, _mm512_set1_epi32(0x7f800000)), _mm512_set1_epi32(0x7f800000));
}

DISPENSO_INLINE Avx512Mask nonnormalOrZero(Avx512Int32 i) {
  __m512i masked = _mm512_and_si512(i.v, _mm512_set1_epi32(0x7f800000));
  __mmask16 isInfNan = _mm512_cmpeq_epi32_mask(masked, _mm512_set1_epi32(0x7f800000));
  __mmask16 isZero = _mm512_cmpeq_epi32_mask(masked, _mm512_setzero_si512());
  return static_cast<__mmask16>(isInfNan | isZero);
}

DISPENSO_INLINE Avx512Mask nonnormal(Avx512Float f) {
  return nonnormal(bit_cast<Avx512Int32>(f));
}

// any_true: reduce mask to scalar bool (Avx512Mask wraps __mmask16).
DISPENSO_INLINE bool any_true(Avx512Mask mask) {
  return mask.m != 0;
}

DISPENSO_INLINE Avx512Float signof(Avx512Float x) {
  Avx512Uint32 xi = bit_cast<Avx512Uint32>(x);
  return bit_cast<Avx512Float>((xi & 0x80000000u) | FloatTraits<Avx512Float>::kOne);
}

DISPENSO_INLINE Avx512Int32 signofi(Avx512Int32 i) {
  // Use mask blend: +1 for i >= 0, -1 for i < 0.
  __mmask16 neg = _mm512_cmplt_epi32_mask(i.v, _mm512_setzero_si512());
  return _mm512_mask_blend_epi32(neg, _mm512_set1_epi32(1), _mm512_set1_epi32(-1));
}

// nbool_as_one: 0 if mask is true, 1 if false.
template <>
DISPENSO_INLINE Avx512Float nbool_as_one<Avx512Float, Avx512Mask>(Avx512Mask b) {
  // Where mask is false (0 in mask bit), load 1.0f; where true, load 0.
  return _mm512_maskz_mov_ps(static_cast<__mmask16>(~b.m), _mm512_set1_ps(1.0f));
}

template <>
DISPENSO_INLINE Avx512Int32 nbool_as_one<Avx512Int32, Avx512Mask>(Avx512Mask b) {
  return _mm512_maskz_set1_epi32(static_cast<__mmask16>(~b.m), 1);
}

template <>
DISPENSO_INLINE Avx512Int32 nbool_as_one<Avx512Int32, Avx512Int32>(Avx512Int32 b) {
  // Lane-wide mask: non-zero → "true" → return 0; zero → "false" → return 1.
  __mmask16 isZero = _mm512_cmpeq_epi32_mask(b.v, _mm512_setzero_si512());
  return _mm512_maskz_set1_epi32(isZero, 1);
}

// bool_as_one: 1 if mask is true, 0 if false.
template <>
DISPENSO_INLINE Avx512Float bool_as_one<Avx512Float, Avx512Mask>(Avx512Mask b) {
  return _mm512_maskz_mov_ps(b.m, _mm512_set1_ps(1.0f));
}

template <>
DISPENSO_INLINE Avx512Int32 bool_as_one<Avx512Int32, Avx512Mask>(Avx512Mask b) {
  return _mm512_maskz_set1_epi32(b.m, 1);
}

// bool_as_mask: convert Avx512Mask to lane-wide Avx512Int32 mask.
template <>
DISPENSO_INLINE Avx512Int32 bool_as_mask<Avx512Int32, Avx512Mask>(Avx512Mask b) {
  return _mm512_maskz_set1_epi32(b.m, -1);
}

template <>
DISPENSO_INLINE Avx512Int32 bool_as_mask<Avx512Int32, Avx512Int32>(Avx512Int32 b) {
  return b;
}

template <>
DISPENSO_INLINE Avx512Int32 bool_as_mask<Avx512Int32, Avx512Uint32>(Avx512Uint32 b) {
  return b.v;
}

template <>
DISPENSO_INLINE Avx512Uint32 bool_as_mask<Avx512Uint32, Avx512Mask>(Avx512Mask b) {
  return _mm512_maskz_set1_epi32(b.m, -1);
}

template <>
DISPENSO_INLINE Avx512Uint32 bool_as_mask<Avx512Uint32, Avx512Uint32>(Avx512Uint32 b) {
  return b;
}

template <>
DISPENSO_INLINE Avx512Uint32 bool_as_mask<Avx512Uint32, Avx512Int32>(Avx512Int32 b) {
  return b.v;
}

// bool_apply_or_zero: specialized for Avx512Mask to use native masked operations.
template <>
DISPENSO_INLINE Avx512Float
bool_apply_or_zero<Avx512Float, Avx512Mask>(Avx512Mask b, Avx512Float val) {
  return _mm512_maskz_mov_ps(b.m, val.v);
}

template <>
DISPENSO_INLINE Avx512Int32
bool_apply_or_zero<Avx512Int32, Avx512Mask>(Avx512Mask b, Avx512Int32 val) {
  return _mm512_maskz_mov_epi32(b.m, val.v);
}

template <>
DISPENSO_INLINE Avx512Uint32
bool_apply_or_zero<Avx512Uint32, Avx512Mask>(Avx512Mask b, Avx512Uint32 val) {
  return _mm512_maskz_mov_epi32(b.m, val.v);
}

} // namespace fast_math
} // namespace dispenso

#endif // defined(__AVX512F__)


================================================
FILE: dispenso/fast_math/float_traits_hwy.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

// Only include when Highway is available on the include path.
// Consumers must depend on the Highway library target.
#if __has_include("hwy/highway.h")

#include "hwy/highway.h"

#include <cstdint>

#include "float_traits.h"
#include "util.h"

namespace dispenso {
namespace fast_math {

namespace hn = hwy::HWY_NAMESPACE;

using HwyFloatTag = hn::ScalableTag<float>;
using HwyInt32Tag = hn::RebindToSigned<HwyFloatTag>;
using HwyUint32Tag = hn::RebindToUnsigned<HwyFloatTag>;

struct HwyInt32;
struct HwyUint32;

// N-wide float SIMD wrapper around Highway Vec<ScalableTag<float>>.
// Width is determined at compile time by Highway's static dispatch.
struct HwyFloat {
  hn::Vec<HwyFloatTag> v;

  HwyFloat() = default;
  HwyFloat(hn::Vec<HwyFloatTag> vec) : v(vec) {}
  // Implicit broadcast from scalar — required for polynomial constant propagation.
  HwyFloat(float f) : v(hn::Set(HwyFloatTag{}, f)) {}
  // Explicit int→float conversion for (Flt)intExpr patterns.
  explicit HwyFloat(HwyInt32 i);

  HwyFloat operator-() const {
    return hn::Neg(v);
  }

  HwyFloat& operator+=(HwyFloat o) {
    v = hn::Add(v, o.v);
    return *this;
  }
  HwyFloat& operator-=(HwyFloat o) {
    v = hn::Sub(v, o.v);
    return *this;
  }
  HwyFloat& operator*=(HwyFloat o) {
    v = hn::Mul(v, o.v);
    return *this;
  }
  HwyFloat& operator&=(HwyFloat o) {
    const HwyInt32Tag di;
    v = hn::BitCast(HwyFloatTag{}, hn::And(hn::BitCast(di, v), hn::BitCast(di, o.v)));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend HwyFloat operator+(HwyFloat a, HwyFloat b) {
    return hn::Add(a.v, b.v);
  }
  friend HwyFloat operator-(HwyFloat a, HwyFloat b) {
    return hn::Sub(a.v, b.v);
  }
  friend HwyFloat operator*(HwyFloat a, HwyFloat b) {
    return hn::Mul(a.v, b.v);
  }
  friend HwyFloat operator/(HwyFloat a, HwyFloat b) {
    return hn::Div(a.v, b.v);
  }

  // Comparisons return HwyFloat masks (all-ones or all-zeros per lane).
  friend HwyFloat operator<(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    return hn::VecFromMask(d, hn::Lt(a.v, b.v));
  }
  friend HwyFloat operator>(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    return hn::VecFromMask(d, hn::Gt(a.v, b.v));
  }
  friend HwyFloat operator<=(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    return hn::VecFromMask(d, hn::Le(a.v, b.v));
  }
  friend HwyFloat operator>=(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    return hn::VecFromMask(d, hn::Ge(a.v, b.v));
  }
  friend HwyFloat operator==(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    return hn::VecFromMask(d, hn::Eq(a.v, b.v));
  }
  friend HwyFloat operator!=(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    return hn::VecFromMask(d, hn::Ne(a.v, b.v));
  }

  // Logical NOT of a comparison mask.
  friend HwyFloat operator!(HwyFloat a) {
    const HwyFloatTag d;
    const HwyInt32Tag di;
    return hn::BitCast(d, hn::Xor(hn::BitCast(di, a.v), hn::Set(di, -1)));
  }

  // Bitwise ops on float masks.
  friend HwyFloat operator&(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    const HwyInt32Tag di;
    return hn::BitCast(d, hn::And(hn::BitCast(di, a.v), hn::BitCast(di, b.v)));
  }
  friend HwyFloat operator|(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    const HwyInt32Tag di;
    return hn::BitCast(d, hn::Or(hn::BitCast(di, a.v), hn::BitCast(di, b.v)));
  }
  friend HwyFloat operator^(HwyFloat a, HwyFloat b) {
    const HwyFloatTag d;
    const HwyInt32Tag di;
    return hn::BitCast(d, hn::Xor(hn::BitCast(di, a.v), hn::BitCast(di, b.v)));
  }
};

// N-wide int32 SIMD wrapper around Highway Vec<RebindToSigned<ScalableTag<float>>>.
struct HwyInt32 {
  hn::Vec<HwyInt32Tag> v;

  HwyInt32() = default;
  HwyInt32(hn::Vec<HwyInt32Tag> vec) : v(vec) {}
  HwyInt32(int32_t i) : v(hn::Set(HwyInt32Tag{}, i)) {}
  // Reinterpret from HwyUint32 (no-op).
  HwyInt32(HwyUint32 u);

  HwyInt32 operator-() const {
    return hn::Neg(v);
  }

  HwyInt32& operator+=(HwyInt32 o) {
    v = hn::Add(v, o.v);
    return *this;
  }
  HwyInt32& operator-=(HwyInt32 o) {
    v = hn::Sub(v, o.v);
    return *this;
  }
  HwyInt32& operator*=(HwyInt32 o) {
    v = hn::Mul(v, o.v);
    return *this;
  }
  HwyInt32& operator&=(HwyInt32 o) {
    v = hn::And(v, o.v);
    return *this;
  }
  HwyInt32& operator|=(HwyInt32 o) {
    v = hn::Or(v, o.v);
    return *this;
  }

  // Shifts.
  HwyInt32 operator<<(int n) const {
    const HwyInt32Tag di;
    return hn::Shl(v, hn::BitCast(di, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n))));
  }
  HwyInt32 operator>>(int n) const {
    const HwyInt32Tag di;
    return hn::Shr(v, hn::BitCast(di, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n))));
  }
  HwyInt32& operator<<=(int n) {
    const HwyInt32Tag di;
    v = hn::Shl(v, hn::BitCast(di, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n))));
    return *this;
  }
  HwyInt32& operator>>=(int n) {
    const HwyInt32Tag di;
    v = hn::Shr(v, hn::BitCast(di, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n))));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend HwyInt32 operator+(HwyInt32 a, HwyInt32 b) {
    return hn::Add(a.v, b.v);
  }
  friend HwyInt32 operator-(HwyInt32 a, HwyInt32 b) {
    return hn::Sub(a.v, b.v);
  }
  friend HwyInt32 operator*(HwyInt32 a, HwyInt32 b) {
    return hn::Mul(a.v, b.v);
  }

  // Bitwise.
  friend HwyInt32 operator&(HwyInt32 a, HwyInt32 b) {
    return hn::And(a.v, b.v);
  }
  friend HwyInt32 operator|(HwyInt32 a, HwyInt32 b) {
    return hn::Or(a.v, b.v);
  }
  friend HwyInt32 operator^(HwyInt32 a, HwyInt32 b) {
    return hn::Xor(a.v, b.v);
  }

  friend HwyInt32 operator~(HwyInt32 a) {
    return hn::Not(a.v);
  }

  // Comparisons return HwyInt32 masks (via VecFromMask).
  friend HwyInt32 operator==(HwyInt32 a, HwyInt32 b) {
    const HwyInt32Tag di;
    return hn::VecFromMask(di, hn::Eq(a.v, b.v));
  }
  friend HwyInt32 operator!=(HwyInt32 a, HwyInt32 b) {
    const HwyInt32Tag di;
    return hn::VecFromMask(di, hn::Not(hn::Eq(a.v, b.v)));
  }
  friend HwyInt32 operator<(HwyInt32 a, HwyInt32 b) {
    const HwyInt32Tag di;
    return hn::VecFromMask(di, hn::Lt(a.v, b.v));
  }
  friend HwyInt32 operator>(HwyInt32 a, HwyInt32 b) {
    const HwyInt32Tag di;
    return hn::VecFromMask(di, hn::Gt(a.v, b.v));
  }

  friend HwyInt32 operator!(HwyInt32 a) {
    const HwyInt32Tag di;
    return hn::VecFromMask(di, hn::Eq(a.v, hn::Zero(di)));
  }
};

// N-wide uint32 SIMD wrapper around Highway Vec<RebindToUnsigned<ScalableTag<float>>>.
struct HwyUint32 {
  hn::Vec<HwyUint32Tag> v;

  HwyUint32() = default;
  HwyUint32(hn::Vec<HwyUint32Tag> vec) : v(vec) {}
  HwyUint32(uint32_t u) : v(hn::Set(HwyUint32Tag{}, u)) {}
  // Reinterpret from HwyInt32 (no-op).
  HwyUint32(HwyInt32 i) : v(hn::BitCast(HwyUint32Tag{}, i.v)) {}

  HwyUint32& operator+=(HwyUint32 o) {
    v = hn::Add(v, o.v);
    return *this;
  }
  HwyUint32& operator-=(HwyUint32 o) {
    v = hn::Sub(v, o.v);
    return *this;
  }
  HwyUint32& operator*=(HwyUint32 o) {
    v = hn::Mul(v, o.v);
    return *this;
  }
  HwyUint32& operator&=(HwyUint32 o) {
    v = hn::And(v, o.v);
    return *this;
  }
  HwyUint32& operator|=(HwyUint32 o) {
    v = hn::Or(v, o.v);
    return *this;
  }

  // Shifts.
  HwyUint32 operator<<(int n) const {
    return hn::Shl(v, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n)));
  }
  HwyUint32 operator>>(int n) const {
    return hn::Shr(v, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n)));
  }
  HwyUint32& operator<<=(int n) {
    v = hn::Shl(v, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n)));
    return *this;
  }
  HwyUint32& operator>>=(int n) {
    v = hn::Shr(v, hn::Set(HwyUint32Tag{}, static_cast<uint32_t>(n)));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend HwyUint32 operator+(HwyUint32 a, HwyUint32 b) {
    return hn::Add(a.v, b.v);
  }
  friend HwyUint32 operator-(HwyUint32 a, HwyUint32 b) {
    return hn::Sub(a.v, b.v);
  }
  friend HwyUint32 operator*(HwyUint32 a, HwyUint32 b) {
    return hn::Mul(a.v, b.v);
  }

  // Bitwise.
  friend HwyUint32 operator&(HwyUint32 a, HwyUint32 b) {
    return hn::And(a.v, b.v);
  }
  friend HwyUint32 operator|(HwyUint32 a, HwyUint32 b) {
    return hn::Or(a.v, b.v);
  }
  friend HwyUint32 operator^(HwyUint32 a, HwyUint32 b) {
    return hn::Xor(a.v, b.v);
  }

  friend HwyUint32 operator~(HwyUint32 a) {
    return hn::Not(a.v);
  }

  // Comparisons.
  friend HwyUint32 operator==(HwyUint32 a, HwyUint32 b) {
    const HwyUint32Tag du;
    return hn::VecFromMask(du, hn::Eq(a.v, b.v));
  }
  friend HwyUint32 operator!=(HwyUint32 a, HwyUint32 b) {
    const HwyUint32Tag du;
    return hn::VecFromMask(du, hn::Not(hn::Eq(a.v, b.v)));
  }
  friend HwyUint32 operator>(HwyUint32 a, HwyUint32 b) {
    const HwyUint32Tag du;
    return hn::VecFromMask(du, hn::Gt(a.v, b.v));
  }
  friend HwyUint32 operator<(HwyUint32 a, HwyUint32 b) {
    const HwyUint32Tag du;
    return hn::VecFromMask(du, hn::Lt(a.v, b.v));
  }

  friend HwyUint32 operator!(HwyUint32 a) {
    const HwyUint32Tag du;
    return hn::VecFromMask(du, hn::Eq(a.v, hn::Zero(du)));
  }
};

// --- Deferred inline definitions ---

// HwyFloat ↔ HwyInt32 conversion.
inline HwyFloat::HwyFloat(HwyInt32 i) : v(hn::ConvertTo(HwyFloatTag{}, i.v)) {}

// HwyInt32 ↔ HwyUint32 reinterpret (no-op).
inline HwyInt32::HwyInt32(HwyUint32 u) : v(hn::BitCast(HwyInt32Tag{}, u.v)) {}

// Map raw Highway vector to HwyFloat for SimdTypeFor.
template <>
struct SimdTypeFor<hn::Vec<HwyFloatTag>> {
  using type = HwyFloat;
};

// --- bit_cast specializations ---

template <>
inline HwyInt32 bit_cast<HwyInt32>(const HwyFloat& f) noexcept {
  return hn::BitCast(HwyInt32Tag{}, f.v);
}
template <>
inline HwyUint32 bit_cast<HwyUint32>(const HwyFloat& f) noexcept {
  return hn::BitCast(HwyUint32Tag{}, f.v);
}
template <>
inline HwyFloat bit_cast<HwyFloat>(const HwyInt32& i) noexcept {
  return hn::BitCast(HwyFloatTag{}, i.v);
}
template <>
inline HwyFloat bit_cast<HwyFloat>(const HwyUint32& u) noexcept {
  return hn::BitCast(HwyFloatTag{}, u.v);
}
template <>
inline HwyInt32 bit_cast<HwyInt32>(const HwyUint32& u) noexcept {
  return hn::BitCast(HwyInt32Tag{}, u.v);
}
template <>
inline HwyUint32 bit_cast<HwyUint32>(const HwyInt32& i) noexcept {
  return hn::BitCast(HwyUint32Tag{}, i.v);
}

// --- FloatTraits<HwyFloat> ---

template <>
struct FloatTraits<HwyFloat> {
  using IntType = HwyInt32;
  using UintType = HwyUint32;
  using BoolType = HwyFloat; // Float comparison masks (lane-wide)

  static constexpr uint32_t kOne = 0x3f800000;
  static constexpr float kMagic = 12582912.f;
  static constexpr bool kBoolIsMask = true;

  static DISPENSO_INLINE HwyFloat sqrt(HwyFloat x) {
    return hn::Sqrt(x.v);
  }

  static DISPENSO_INLINE HwyFloat rcp(HwyFloat x) {
    return hn::ApproximateReciprocal(x.v);
  }

  // Highway always has FMA.
  static DISPENSO_INLINE HwyFloat fma(HwyFloat a, HwyFloat b, HwyFloat c) {
    return hn::MulAdd(a.v, b.v, c.v);
  }

  // conditional: select x where mask is true, y where false.
  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(HwyFloat mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(HwyInt32 mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg apply(HwyFloat mask, Arg x);

  static DISPENSO_INLINE HwyFloat min(HwyFloat a, HwyFloat b) {
    return hn::Min(a.v, b.v);
  }

  static DISPENSO_INLINE HwyFloat max(HwyFloat a, HwyFloat b) {
    return hn::Max(a.v, b.v);
  }
};

// conditional specializations (HwyFloat mask).
// MaskFromVec extracts MSB: all-ones → true, all-zeros → false.
template <>
inline HwyFloat FloatTraits<HwyFloat>::conditional(HwyFloat mask, HwyFloat x, HwyFloat y) {
  auto m = hn::MaskFromVec(mask.v);
  return hn::IfThenElse(m, x.v, y.v);
}
template <>
inline HwyInt32 FloatTraits<HwyFloat>::conditional(HwyFloat mask, HwyInt32 x, HwyInt32 y) {
  const HwyInt32Tag di;
  auto imask = hn::BitCast(di, mask.v);
  auto m = hn::MaskFromVec(imask);
  return hn::IfThenElse(m, x.v, y.v);
}
template <>
inline HwyUint32 FloatTraits<HwyFloat>::conditional(HwyFloat mask, HwyUint32 x, HwyUint32 y) {
  const HwyUint32Tag du;
  auto umask = hn::BitCast(du, mask.v);
  auto m = hn::MaskFromVec(umask);
  return hn::IfThenElse(m, x.v, y.v);
}

// conditional specializations (HwyInt32 lane-wide mask).
template <>
inline HwyFloat FloatTraits<HwyFloat>::conditional(HwyInt32 mask, HwyFloat x, HwyFloat y) {
  const HwyFloatTag d;
  auto fmask = hn::BitCast(d, mask.v);
  auto m = hn::MaskFromVec(fmask);
  return hn::IfThenElse(m, x.v, y.v);
}
template <>
inline HwyInt32 FloatTraits<HwyFloat>::conditional(HwyInt32 mask, HwyInt32 x, HwyInt32 y) {
  auto m = hn::MaskFromVec(mask.v);
  return hn::IfThenElse(m, x.v, y.v);
}
template <>
inline HwyUint32 FloatTraits<HwyFloat>::conditional(HwyInt32 mask, HwyUint32 x, HwyUint32 y) {
  const HwyUint32Tag du;
  auto umask = hn::BitCast(du, mask.v);
  auto m = hn::MaskFromVec(umask);
  return hn::IfThenElse(m, x.v, y.v);
}

// apply: mask & x (bitwise AND).
template <>
inline HwyFloat FloatTraits<HwyFloat>::apply(HwyFloat mask, HwyFloat x) {
  const HwyFloatTag d;
  const HwyInt32Tag di;
  return hn::BitCast(d, hn::And(hn::BitCast(di, mask.v), hn::BitCast(di, x.v)));
}
template <>
inline HwyInt32 FloatTraits<HwyFloat>::apply(HwyFloat mask, HwyInt32 x) {
  const HwyInt32Tag di;
  return hn::And(hn::BitCast(di, mask.v), x.v);
}
template <>
inline HwyUint32 FloatTraits<HwyFloat>::apply(HwyFloat mask, HwyUint32 x) {
  const HwyUint32Tag du;
  return hn::And(hn::BitCast(du, mask.v), x.v);
}

template <>
struct FloatTraits<HwyInt32> {
  using IntType = HwyInt32;
};

template <>
struct FloatTraits<HwyUint32> {
  using IntType = HwyUint32;
};

// --- Util function overloads for Highway types ---

DISPENSO_INLINE HwyFloat floor_small(HwyFloat x) {
  return hn::Floor(x.v);
}

DISPENSO_INLINE HwyInt32 convert_to_int_trunc(HwyFloat f) {
  const HwyInt32Tag di;
  // ConvertTo truncates toward zero (no Round needed).
  return hn::ConvertTo(di, f.v);
}

DISPENSO_INLINE HwyInt32 convert_to_int_trunc_safe(HwyFloat f) {
  const HwyInt32Tag di;
  auto converted = hn::ConvertTo(di, f.v);
  HwyInt32 fi = bit_cast<HwyInt32>(f);
  HwyInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & HwyInt32(converted);
}

DISPENSO_INLINE HwyInt32 convert_to_int(HwyFloat f) {
  const HwyInt32Tag di;
  // Round to nearest even, then convert to int.
  auto rounded = hn::Round(f.v);
  auto converted = hn::ConvertTo(di, rounded);
  // Mask non-normals to 0 to avoid undefined behavior.
  HwyInt32 fi = bit_cast<HwyInt32>(f);
  HwyInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & HwyInt32(converted);
}

template <>
DISPENSO_INLINE HwyFloat min<HwyFloat>(HwyFloat x, HwyFloat mn) {
  // If x is NaN, return mn. Explicit NaN handling for portability.
  auto is_num = hn::Eq(x.v, x.v); // true for non-NaN
  auto result = hn::Min(x.v, mn.v);
  return hn::IfThenElse(is_num, result, mn.v);
}

template <>
DISPENSO_INLINE HwyFloat clamp_allow_nan<HwyFloat>(HwyFloat x, HwyFloat mn, HwyFloat mx) {
  // NaN propagates: if x is NaN, result is NaN.
  auto is_nan = hn::Not(hn::Eq(x.v, x.v));
  auto clamped = hn::Max(mn.v, hn::Min(mx.v, x.v));
  return hn::IfThenElse(is_nan, x.v, clamped);
}

template <>
DISPENSO_INLINE HwyFloat clamp_no_nan<HwyFloat>(HwyFloat x, HwyFloat mn, HwyFloat mx) {
  // Highway's Min/Max follow IEEE 754-2019 minimum/maximum semantics: if
  // either operand is NaN, the result is NaN. But we want NaN-suppressing
  // behavior (like x86 minps/maxps): if x is NaN, result should be mn.
  // Clamp(x) = Max(mn, Min(x, mx)). With NaN x:
  //   Min(NaN, mx) = NaN → Max(mn, NaN) = NaN. Wrong — we want mn.
  // Fix: use ordered comparison to detect NaN lanes and blend.
  auto clamped = hn::Max(mn.v, hn::Min(x.v, mx.v));
  auto x_valid = hn::Eq(x.v, x.v); // false for NaN lanes
  return hn::IfThenElse(x_valid, clamped, mn.v);
}

template <>
DISPENSO_INLINE HwyFloat gather<HwyFloat>(const float* table, HwyInt32 index) {
  const HwyFloatTag d;
  return hn::GatherIndex(d, table, index.v);
}

DISPENSO_INLINE HwyInt32 int_div_by_3(HwyInt32 i) {
  // Multiply each 32-bit lane by 0x55555556 and take the high 32 bits.
  // Uses widening MulEven/MulOdd to stay in SIMD registers (no scalar fallback).
  const HwyUint32Tag du;
  const HwyInt32Tag di;
  auto ui = hn::BitCast(du, i.v);
  auto mul = hn::Set(du, 0x55555556u);
  // Even lanes (0, 2, ...): widening 32×32→64 multiply.
  auto even_prod = hn::MulEven(ui, mul);
  // Odd lanes (1, 3, ...): widening 32×32→64 multiply.
  auto odd_prod = hn::MulOdd(ui, mul);
  // Extract high 32 bits of each 64-bit product.
  auto even_hi = hn::ShiftRight<32>(even_prod);
  auto odd_hi = hn::ShiftRight<32>(odd_prod);
  // Interleave: odd results into upper 32 bits of each 64-bit lane, OR with even.
  auto combined = hn::Or(even_hi, hn::ShiftLeft<32>(odd_hi));
  return hn::BitCast(di, combined);
}

// nonnormal/nonnormalOrZero: return HwyInt32 masks.
DISPENSO_INLINE HwyInt32 nonnormal(HwyInt32 i) {
  return (i & 0x7f800000) == 0x7f800000;
}

DISPENSO_INLINE HwyInt32 nonnormalOrZero(HwyInt32 i) {
  auto m = i & 0x7f800000;
  return (m == 0x7f800000) | (m == 0);
}

DISPENSO_INLINE HwyInt32 nonnormal(HwyFloat f) {
  return nonnormal(bit_cast<HwyInt32>(f));
}

// any_true: reduce SIMD mask to scalar bool (true if any lane is set).
DISPENSO_INLINE bool any_true(HwyInt32 mask) {
  hn::ScalableTag<int32_t> d;
  return !hn::AllFalse(d, hn::Ne(mask.v, hn::Zero(d)));
}

DISPENSO_INLINE HwyFloat signof(HwyFloat x) {
  HwyUint32 xi = bit_cast<HwyUint32>(x);
  return bit_cast<HwyFloat>((xi & 0x80000000u) | FloatTraits<HwyFloat>::kOne);
}

DISPENSO_INLINE HwyInt32 signofi(HwyInt32 i) {
  return HwyInt32(1) - (HwyInt32(2) & (i < HwyInt32(0)));
}

// nbool_as_one: 0 if mask is true (all-ones), 1 if false (all-zeros).
template <>
DISPENSO_INLINE HwyFloat nbool_as_one<HwyFloat, HwyFloat>(HwyFloat b) {
  const HwyFloatTag d;
  const HwyInt32Tag di;
  auto bi = hn::BitCast(di, b.v);
  auto one_bits = hn::Set(di, 0x3f800000);
  return hn::BitCast(d, hn::AndNot(bi, one_bits));
}

template <>
DISPENSO_INLINE HwyInt32 nbool_as_one<HwyInt32, HwyFloat>(HwyFloat b) {
  const HwyInt32Tag di;
  auto bi = hn::BitCast(di, b.v);
  return hn::AndNot(bi, hn::Set(di, 1));
}

template <>
DISPENSO_INLINE HwyInt32 nbool_as_one<HwyInt32, HwyInt32>(HwyInt32 b) {
  const HwyInt32Tag di;
  return hn::AndNot(b.v, hn::Set(di, 1));
}

// bool_as_one: 1 if mask is true, 0 if false.
template <>
DISPENSO_INLINE HwyFloat bool_as_one<HwyFloat, HwyFloat>(HwyFloat b) {
  const HwyFloatTag d;
  const HwyInt32Tag di;
  auto bi = hn::BitCast(di, b.v);
  return hn::BitCast(d, hn::And(bi, hn::Set(di, 0x3f800000)));
}

template <>
DISPENSO_INLINE HwyInt32 bool_as_one<HwyInt32, HwyFloat>(HwyFloat b) {
  const HwyInt32Tag di;
  auto bi = hn::BitCast(di, b.v);
  return hn::And(bi, hn::Set(di, 1));
}

// bool_as_mask: for SIMD masks, identity (already a mask).
template <>
DISPENSO_INLINE HwyInt32 bool_as_mask<HwyInt32, HwyFloat>(HwyFloat b) {
  return hn::BitCast(HwyInt32Tag{}, b.v);
}

template <>
DISPENSO_INLINE HwyInt32 bool_as_mask<HwyInt32, HwyInt32>(HwyInt32 b) {
  return b;
}

template <>
DISPENSO_INLINE HwyInt32 bool_as_mask<HwyInt32, HwyUint32>(HwyUint32 b) {
  return hn::BitCast(HwyInt32Tag{}, b.v);
}

template <>
DISPENSO_INLINE HwyUint32 bool_as_mask<HwyUint32, HwyFloat>(HwyFloat b) {
  return hn::BitCast(HwyUint32Tag{}, b.v);
}

template <>
DISPENSO_INLINE HwyUint32 bool_as_mask<HwyUint32, HwyUint32>(HwyUint32 b) {
  return b;
}

template <>
DISPENSO_INLINE HwyUint32 bool_as_mask<HwyUint32, HwyInt32>(HwyInt32 b) {
  return hn::BitCast(HwyUint32Tag{}, b.v);
}

} // namespace fast_math
} // namespace dispenso

#endif // __has_include("hwy/highway.h")


================================================
FILE: dispenso/fast_math/float_traits_neon.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#if defined(__aarch64__)

#include <arm_neon.h>

#include <cstdint>

#include "float_traits.h"
#include "util.h"

namespace dispenso {
namespace fast_math {

struct NeonInt32;
struct NeonUint32;

// 4-wide float SIMD wrapper around float32x4_t (AArch64 NEON).
struct NeonFloat {
  float32x4_t v;

  NeonFloat() = default;
  NeonFloat(float32x4_t vec) : v(vec) {}
  // Implicit broadcast from scalar — required for polynomial constant propagation.
  NeonFloat(float f) : v(vdupq_n_f32(f)) {}
  // Explicit int→float conversion for (Flt)intExpr patterns.
  explicit NeonFloat(NeonInt32 i);

  NeonFloat operator-() const {
    return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v), vdupq_n_u32(0x80000000)));
  }

  NeonFloat& operator+=(NeonFloat o) {
    v = vaddq_f32(v, o.v);
    return *this;
  }
  NeonFloat& operator-=(NeonFloat o) {
    v = vsubq_f32(v, o.v);
    return *this;
  }
  NeonFloat& operator*=(NeonFloat o) {
    v = vmulq_f32(v, o.v);
    return *this;
  }
  NeonFloat& operator&=(NeonFloat o) {
    v = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v), vreinterpretq_u32_f32(o.v)));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend NeonFloat operator+(NeonFloat a, NeonFloat b) {
    return vaddq_f32(a.v, b.v);
  }
  friend NeonFloat operator-(NeonFloat a, NeonFloat b) {
    return vsubq_f32(a.v, b.v);
  }
  friend NeonFloat operator*(NeonFloat a, NeonFloat b) {
    return vmulq_f32(a.v, b.v);
  }
  friend NeonFloat operator/(NeonFloat a, NeonFloat b) {
    return vdivq_f32(a.v, b.v);
  }

  // Comparisons return NeonFloat masks (all-ones or all-zeros per lane).
  // NEON comparisons produce uint32x4_t; reinterpret to float32x4_t.
  friend NeonFloat operator<(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vcltq_f32(a.v, b.v));
  }
  friend NeonFloat operator>(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vcgtq_f32(a.v, b.v));
  }
  friend NeonFloat operator<=(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vcleq_f32(a.v, b.v));
  }
  friend NeonFloat operator>=(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vcgeq_f32(a.v, b.v));
  }
  friend NeonFloat operator==(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vceqq_f32(a.v, b.v));
  }
  friend NeonFloat operator!=(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(a.v, b.v)));
  }

  // Logical NOT of a comparison mask.
  friend NeonFloat operator!(NeonFloat a) {
    return vreinterpretq_f32_u32(vmvnq_u32(vreinterpretq_u32_f32(a.v)));
  }

  // Bitwise ops on float masks.
  friend NeonFloat operator&(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)));
  }
  friend NeonFloat operator|(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)));
  }
  friend NeonFloat operator^(NeonFloat a, NeonFloat b) {
    return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)));
  }
};

// 4-wide int32 SIMD wrapper around int32x4_t.
struct NeonInt32 {
  int32x4_t v;

  NeonInt32() = default;
  NeonInt32(int32x4_t vec) : v(vec) {}
  NeonInt32(int32_t i) : v(vdupq_n_s32(i)) {}
  // Reinterpret from NeonUint32 (no-op).
  NeonInt32(NeonUint32 u);

  NeonInt32 operator-() const {
    return vnegq_s32(v);
  }

  NeonInt32& operator+=(NeonInt32 o) {
    v = vaddq_s32(v, o.v);
    return *this;
  }
  NeonInt32& operator-=(NeonInt32 o) {
    v = vsubq_s32(v, o.v);
    return *this;
  }
  NeonInt32& operator*=(NeonInt32 o) {
    v = vmulq_s32(v, o.v);
    return *this;
  }
  NeonInt32& operator&=(NeonInt32 o) {
    v = vandq_s32(v, o.v);
    return *this;
  }
  NeonInt32& operator|=(NeonInt32 o) {
    v = vorrq_s32(v, o.v);
    return *this;
  }

  // Shifts (NEON uses vshlq with signed shift amount: positive=left, negative=right).
  NeonInt32 operator<<(int n) const {
    return vshlq_s32(v, vdupq_n_s32(n));
  }
  NeonInt32 operator>>(int n) const {
    return vshlq_s32(v, vdupq_n_s32(-n)); // Arithmetic right shift
  }
  NeonInt32& operator<<=(int n) {
    v = vshlq_s32(v, vdupq_n_s32(n));
    return *this;
  }
  NeonInt32& operator>>=(int n) {
    v = vshlq_s32(v, vdupq_n_s32(-n));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend NeonInt32 operator+(NeonInt32 a, NeonInt32 b) {
    return vaddq_s32(a.v, b.v);
  }
  friend NeonInt32 operator-(NeonInt32 a, NeonInt32 b) {
    return vsubq_s32(a.v, b.v);
  }
  friend NeonInt32 operator*(NeonInt32 a, NeonInt32 b) {
    return vmulq_s32(a.v, b.v);
  }

  // Bitwise.
  friend NeonInt32 operator&(NeonInt32 a, NeonInt32 b) {
    return vandq_s32(a.v, b.v);
  }
  friend NeonInt32 operator|(NeonInt32 a, NeonInt32 b) {
    return vorrq_s32(a.v, b.v);
  }
  friend NeonInt32 operator^(NeonInt32 a, NeonInt32 b) {
    return veorq_s32(a.v, b.v);
  }

  friend NeonInt32 operator~(NeonInt32 a) {
    return vmvnq_s32(a.v);
  }

  // Comparisons return NeonInt32 masks (reinterpreted from uint32x4_t).
  friend NeonInt32 operator==(NeonInt32 a, NeonInt32 b) {
    return vreinterpretq_s32_u32(vceqq_s32(a.v, b.v));
  }
  friend NeonInt32 operator!=(NeonInt32 a, NeonInt32 b) {
    return vreinterpretq_s32_u32(vmvnq_u32(vceqq_s32(a.v, b.v)));
  }
  friend NeonInt32 operator<(NeonInt32 a, NeonInt32 b) {
    return vreinterpretq_s32_u32(vcltq_s32(a.v, b.v));
  }
  friend NeonInt32 operator>(NeonInt32 a, NeonInt32 b) {
    return vreinterpretq_s32_u32(vcgtq_s32(a.v, b.v));
  }

  friend NeonInt32 operator!(NeonInt32 a) {
    return vreinterpretq_s32_u32(vceqq_s32(a.v, vdupq_n_s32(0)));
  }
};

// 4-wide uint32 SIMD wrapper around uint32x4_t.
struct NeonUint32 {
  uint32x4_t v;

  NeonUint32() = default;
  NeonUint32(uint32x4_t vec) : v(vec) {}
  NeonUint32(uint32_t u) : v(vdupq_n_u32(u)) {}
  // Reinterpret from NeonInt32 (no-op).
  NeonUint32(NeonInt32 i) : v(vreinterpretq_u32_s32(i.v)) {}

  NeonUint32& operator+=(NeonUint32 o) {
    v = vaddq_u32(v, o.v);
    return *this;
  }
  NeonUint32& operator-=(NeonUint32 o) {
    v = vsubq_u32(v, o.v);
    return *this;
  }
  NeonUint32& operator*=(NeonUint32 o) {
    v = vmulq_u32(v, o.v);
    return *this;
  }
  NeonUint32& operator&=(NeonUint32 o) {
    v = vandq_u32(v, o.v);
    return *this;
  }
  NeonUint32& operator|=(NeonUint32 o) {
    v = vorrq_u32(v, o.v);
    return *this;
  }

  // Shifts.
  NeonUint32 operator<<(int n) const {
    return vshlq_u32(v, vdupq_n_s32(n));
  }
  NeonUint32 operator>>(int n) const {
    return vshlq_u32(v, vdupq_n_s32(-n)); // Logical right shift
  }
  NeonUint32& operator<<=(int n) {
    v = vshlq_u32(v, vdupq_n_s32(n));
    return *this;
  }
  NeonUint32& operator>>=(int n) {
    v = vshlq_u32(v, vdupq_n_s32(-n));
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend NeonUint32 operator+(NeonUint32 a, NeonUint32 b) {
    return vaddq_u32(a.v, b.v);
  }
  friend NeonUint32 operator-(NeonUint32 a, NeonUint32 b) {
    return vsubq_u32(a.v, b.v);
  }
  friend NeonUint32 operator*(NeonUint32 a, NeonUint32 b) {
    return vmulq_u32(a.v, b.v);
  }

  // Bitwise.
  friend NeonUint32 operator&(NeonUint32 a, NeonUint32 b) {
    return vandq_u32(a.v, b.v);
  }
  friend NeonUint32 operator|(NeonUint32 a, NeonUint32 b) {
    return vorrq_u32(a.v, b.v);
  }
  friend NeonUint32 operator^(NeonUint32 a, NeonUint32 b) {
    return veorq_u32(a.v, b.v);
  }

  friend NeonUint32 operator~(NeonUint32 a) {
    return vmvnq_u32(a.v);
  }

  // NEON has native unsigned comparisons.
  friend NeonUint32 operator==(NeonUint32 a, NeonUint32 b) {
    return vceqq_u32(a.v, b.v);
  }
  friend NeonUint32 operator!=(NeonUint32 a, NeonUint32 b) {
    return vmvnq_u32(vceqq_u32(a.v, b.v));
  }
  friend NeonUint32 operator>(NeonUint32 a, NeonUint32 b) {
    return vcgtq_u32(a.v, b.v);
  }
  friend NeonUint32 operator<(NeonUint32 a, NeonUint32 b) {
    return vcltq_u32(a.v, b.v);
  }

  friend NeonUint32 operator!(NeonUint32 a) {
    return vceqq_u32(a.v, vdupq_n_u32(0));
  }
};

// --- Deferred inline definitions ---

// NeonFloat ↔ NeonInt32 conversion.
inline NeonFloat::NeonFloat(NeonInt32 i) : v(vcvtq_f32_s32(i.v)) {}

// NeonInt32 ↔ NeonUint32 reinterpret (no-op).
inline NeonInt32::NeonInt32(NeonUint32 u) : v(vreinterpretq_s32_u32(u.v)) {}

// Map raw float32x4_t to NeonFloat for SimdTypeFor.
template <>
struct SimdTypeFor<float32x4_t> {
  using type = NeonFloat;
};

// --- bit_cast specializations ---

template <>
inline NeonInt32 bit_cast<NeonInt32>(const NeonFloat& f) noexcept {
  return vreinterpretq_s32_f32(f.v);
}
template <>
inline NeonUint32 bit_cast<NeonUint32>(const NeonFloat& f) noexcept {
  return vreinterpretq_u32_f32(f.v);
}
template <>
inline NeonFloat bit_cast<NeonFloat>(const NeonInt32& i) noexcept {
  return vreinterpretq_f32_s32(i.v);
}
template <>
inline NeonFloat bit_cast<NeonFloat>(const NeonUint32& u) noexcept {
  return vreinterpretq_f32_u32(u.v);
}
template <>
inline NeonInt32 bit_cast<NeonInt32>(const NeonUint32& u) noexcept {
  return vreinterpretq_s32_u32(u.v);
}
template <>
inline NeonUint32 bit_cast<NeonUint32>(const NeonInt32& i) noexcept {
  return vreinterpretq_u32_s32(i.v);
}

// --- FloatTraits<NeonFloat> ---

template <>
struct FloatTraits<NeonFloat> {
  using IntType = NeonInt32;
  using UintType = NeonUint32;
  using BoolType = NeonFloat; // Float comparison masks (lane-wide, like SSE)

  static constexpr uint32_t kOne = 0x3f800000;
  static constexpr float kMagic = 12582912.f;
  static constexpr bool kBoolIsMask = true;

  static DISPENSO_INLINE NeonFloat sqrt(NeonFloat x) {
    return vsqrtq_f32(x.v);
  }

  static DISPENSO_INLINE NeonFloat rcp(NeonFloat x) {
    return vrecpeq_f32(x.v);
  }

  // AArch64 always has FMA.
  static DISPENSO_INLINE NeonFloat fma(NeonFloat a, NeonFloat b, NeonFloat c) {
    return vfmaq_f32(c.v, a.v, b.v); // vfmaq_f32(acc, a, b) = acc + a*b
  }

  // conditional: select x where mask is true, y where false.
  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(NeonFloat mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(NeonInt32 mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg apply(NeonFloat mask, Arg x);

  static DISPENSO_INLINE NeonFloat min(NeonFloat a, NeonFloat b) {
    return vminnmq_f32(a.v, b.v);
  }

  static DISPENSO_INLINE NeonFloat max(NeonFloat a, NeonFloat b) {
    return vmaxnmq_f32(a.v, b.v);
  }
};

// conditional specializations (NeonFloat mask).
// vbslq selects from first operand where mask bits are 1, second where 0.
template <>
inline NeonFloat FloatTraits<NeonFloat>::conditional(NeonFloat mask, NeonFloat x, NeonFloat y) {
  return vbslq_f32(vreinterpretq_u32_f32(mask.v), x.v, y.v);
}
template <>
inline NeonInt32 FloatTraits<NeonFloat>::conditional(NeonFloat mask, NeonInt32 x, NeonInt32 y) {
  return vreinterpretq_s32_u32(vbslq_u32(
      vreinterpretq_u32_f32(mask.v), vreinterpretq_u32_s32(x.v), vreinterpretq_u32_s32(y.v)));
}
template <>
inline NeonUint32 FloatTraits<NeonFloat>::conditional(NeonFloat mask, NeonUint32 x, NeonUint32 y) {
  return vbslq_u32(vreinterpretq_u32_f32(mask.v), x.v, y.v);
}

// conditional specializations (NeonInt32 lane-wide mask).
template <>
inline NeonFloat FloatTraits<NeonFloat>::conditional(NeonInt32 mask, NeonFloat x, NeonFloat y) {
  return vbslq_f32(vreinterpretq_u32_s32(mask.v), x.v, y.v);
}
template <>
inline NeonInt32 FloatTraits<NeonFloat>::conditional(NeonInt32 mask, NeonInt32 x, NeonInt32 y) {
  return vreinterpretq_s32_u32(vbslq_u32(
      vreinterpretq_u32_s32(mask.v), vreinterpretq_u32_s32(x.v), vreinterpretq_u32_s32(y.v)));
}
template <>
inline NeonUint32 FloatTraits<NeonFloat>::conditional(NeonInt32 mask, NeonUint32 x, NeonUint32 y) {
  return vbslq_u32(vreinterpretq_u32_s32(mask.v), x.v, y.v);
}

// apply: mask & x (bitwise AND).
template <>
inline NeonFloat FloatTraits<NeonFloat>::apply(NeonFloat mask, NeonFloat x) {
  return vreinterpretq_f32_u32(
      vandq_u32(vreinterpretq_u32_f32(mask.v), vreinterpretq_u32_f32(x.v)));
}
template <>
inline NeonInt32 FloatTraits<NeonFloat>::apply(NeonFloat mask, NeonInt32 x) {
  return vreinterpretq_s32_u32(
      vandq_u32(vreinterpretq_u32_f32(mask.v), vreinterpretq_u32_s32(x.v)));
}
template <>
inline NeonUint32 FloatTraits<NeonFloat>::apply(NeonFloat mask, NeonUint32 x) {
  return vandq_u32(vreinterpretq_u32_f32(mask.v), x.v);
}

template <>
struct FloatTraits<NeonInt32> {
  using IntType = NeonInt32;
};

template <>
struct FloatTraits<NeonUint32> {
  using IntType = NeonUint32;
};

// --- Util function overloads for NEON types ---

DISPENSO_INLINE NeonFloat floor_small(NeonFloat x) {
  return vrndmq_f32(x.v);
}

DISPENSO_INLINE NeonInt32 convert_to_int_trunc(NeonFloat f) {
  return vcvtq_s32_f32(f.v);
}

DISPENSO_INLINE NeonInt32 convert_to_int_trunc_safe(NeonFloat f) {
  NeonInt32 fi = bit_cast<NeonInt32>(f);
  NeonInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & NeonInt32(vcvtq_s32_f32(f.v));
}

DISPENSO_INLINE NeonInt32 convert_to_int(NeonFloat f) {
  // vcvtnq_s32_f32 uses round-to-nearest-even.
  // Mask non-normals to 0 to avoid undefined behavior.
  NeonInt32 fi = bit_cast<NeonInt32>(f);
  NeonInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & NeonInt32(vcvtnq_s32_f32(f.v));
}

template <>
DISPENSO_INLINE NeonFloat min<NeonFloat>(NeonFloat x, NeonFloat mn) {
  // vminnmq_f32: if x is NaN, returns mn (the number). NaN-suppressing.
  return vminnmq_f32(x.v, mn.v);
}

template <>
DISPENSO_INLINE NeonFloat clamp_allow_nan<NeonFloat>(NeonFloat x, NeonFloat mn, NeonFloat mx) {
  // Use NaN-propagating min/max (FMIN/FMAX) so NaN passes through.
  return vmaxq_f32(mn.v, vminq_f32(mx.v, x.v));
}

template <>
DISPENSO_INLINE NeonFloat clamp_no_nan<NeonFloat>(NeonFloat x, NeonFloat mn, NeonFloat mx) {
  // Use NaN-suppressing min/max (FMINNM/FMAXNM) so NaN is replaced.
  return vmaxnmq_f32(mn.v, vminnmq_f32(x.v, mx.v));
}

template <>
DISPENSO_INLINE NeonFloat gather<NeonFloat>(const float* table, NeonInt32 index) {
  // No NEON gather instruction; extract indices and load scalar.
  alignas(16) int32_t idx[4];
  vst1q_s32(idx, index.v);
  float vals[4] = {table[idx[0]], table[idx[1]], table[idx[2]], table[idx[3]]};
  return vld1q_f32(vals);
}

DISPENSO_INLINE NeonInt32 int_div_by_3(NeonInt32 i) {
  // Multiply each lane by 0x55555556 and take the high 32 bits.
  // Process low (lanes 0,1) and high (lanes 2,3) halves separately.
  uint32x4_t ui = vreinterpretq_u32_s32(i.v);
  uint32x2_t mul = vdup_n_u32(0x55555556);
  // Low lanes (0, 1).
  uint64x2_t lo_prod = vmull_u32(vget_low_u32(ui), mul);
  uint32x2_t lo_result = vshrn_n_u64(lo_prod, 32);
  // High lanes (2, 3).
  uint64x2_t hi_prod = vmull_u32(vget_high_u32(ui), mul);
  uint32x2_t hi_result = vshrn_n_u64(hi_prod, 32);
  return vreinterpretq_s32_u32(vcombine_u32(lo_result, hi_result));
}

// nonnormal/nonnormalOrZero: return NeonInt32 masks for NEON types.
DISPENSO_INLINE NeonInt32 nonnormal(NeonInt32 i) {
  return (i & 0x7f800000) == 0x7f800000;
}

DISPENSO_INLINE NeonInt32 nonnormalOrZero(NeonInt32 i) {
  auto m = i & 0x7f800000;
  return (m == 0x7f800000) | (m == 0);
}

DISPENSO_INLINE NeonInt32 nonnormal(NeonFloat f) {
  return nonnormal(bit_cast<NeonInt32>(f));
}

// any_true: reduce SIMD mask to scalar bool (true if any lane is set).
DISPENSO_INLINE bool any_true(NeonInt32 mask) {
  return vmaxvq_u32(vreinterpretq_u32_s32(mask.v)) != 0;
}

DISPENSO_INLINE NeonFloat signof(NeonFloat x) {
  NeonUint32 xi = bit_cast<NeonUint32>(x);
  return bit_cast<NeonFloat>((xi & 0x80000000u) | FloatTraits<NeonFloat>::kOne);
}

DISPENSO_INLINE NeonInt32 signofi(NeonInt32 i) {
  return NeonInt32(1) - (NeonInt32(2) & (i < NeonInt32(0)));
}

// nbool_as_one: 0 if mask is true (all-ones), 1 if false (all-zeros).
template <>
DISPENSO_INLINE NeonFloat nbool_as_one<NeonFloat, NeonFloat>(NeonFloat b) {
  // ~mask & 1.0f bits
  return bit_cast<NeonFloat>(NeonInt32(
      vreinterpretq_s32_u32(vbicq_u32(vdupq_n_u32(0x3f800000), vreinterpretq_u32_f32(b.v)))));
}

template <>
DISPENSO_INLINE NeonInt32 nbool_as_one<NeonInt32, NeonFloat>(NeonFloat b) {
  return vreinterpretq_s32_u32(vbicq_u32(vdupq_n_u32(1), vreinterpretq_u32_f32(b.v)));
}

template <>
DISPENSO_INLINE NeonInt32 nbool_as_one<NeonInt32, NeonInt32>(NeonInt32 b) {
  return vreinterpretq_s32_u32(vbicq_u32(vdupq_n_u32(1), vreinterpretq_u32_s32(b.v)));
}

// bool_as_one: 1 if mask is true, 0 if false.
template <>
DISPENSO_INLINE NeonFloat bool_as_one<NeonFloat, NeonFloat>(NeonFloat b) {
  return bit_cast<NeonFloat>(NeonInt32(
      vreinterpretq_s32_u32(vandq_u32(vreinterpretq_u32_f32(b.v), vdupq_n_u32(0x3f800000)))));
}

template <>
DISPENSO_INLINE NeonInt32 bool_as_one<NeonInt32, NeonFloat>(NeonFloat b) {
  return vreinterpretq_s32_u32(vandq_u32(vreinterpretq_u32_f32(b.v), vdupq_n_u32(1)));
}

// bool_as_mask: for SIMD masks, identity (already a mask).
template <>
DISPENSO_INLINE NeonInt32 bool_as_mask<NeonInt32, NeonFloat>(NeonFloat b) {
  return vreinterpretq_s32_f32(b.v);
}

template <>
DISPENSO_INLINE NeonInt32 bool_as_mask<NeonInt32, NeonInt32>(NeonInt32 b) {
  return b;
}

template <>
DISPENSO_INLINE NeonInt32 bool_as_mask<NeonInt32, NeonUint32>(NeonUint32 b) {
  return vreinterpretq_s32_u32(b.v);
}

template <>
DISPENSO_INLINE NeonUint32 bool_as_mask<NeonUint32, NeonFloat>(NeonFloat b) {
  return vreinterpretq_u32_f32(b.v);
}

template <>
DISPENSO_INLINE NeonUint32 bool_as_mask<NeonUint32, NeonUint32>(NeonUint32 b) {
  return b;
}

template <>
DISPENSO_INLINE NeonUint32 bool_as_mask<NeonUint32, NeonInt32>(NeonInt32 b) {
  return vreinterpretq_u32_s32(b.v);
}

} // namespace fast_math
} // namespace dispenso

#endif // defined(__aarch64__)


================================================
FILE: dispenso/fast_math/float_traits_x86.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#if defined(__SSE4_1__)

#include <immintrin.h>

#include <cstdint>

#include "float_traits.h"
#include "util.h"

namespace dispenso {
namespace fast_math {

struct SseInt32;
struct SseUint32;

// 4-wide float SIMD wrapper around __m128.
struct SseFloat {
  __m128 v;

  SseFloat() = default;
  SseFloat(__m128 vec) : v(vec) {}
  // Implicit broadcast from scalar — required for polynomial constant propagation.
  SseFloat(float f) : v(_mm_set1_ps(f)) {}
  // Explicit int→float conversion for (Flt)intExpr patterns.
  explicit SseFloat(SseInt32 i);

  // Implicit conversion back to raw intrinsic type.
  operator __m128() const {
    return v;
  }

  SseFloat operator-() const {
    return _mm_xor_ps(v, _mm_set1_ps(-0.0f));
  }

  SseFloat& operator+=(SseFloat o) {
    v = _mm_add_ps(v, o.v);
    return *this;
  }
  SseFloat& operator-=(SseFloat o) {
    v = _mm_sub_ps(v, o.v);
    return *this;
  }
  SseFloat& operator*=(SseFloat o) {
    v = _mm_mul_ps(v, o.v);
    return *this;
  }
  SseFloat& operator&=(SseFloat o) {
    v = _mm_and_ps(v, o.v);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend SseFloat operator+(SseFloat a, SseFloat b) {
    return _mm_add_ps(a.v, b.v);
  }
  friend SseFloat operator-(SseFloat a, SseFloat b) {
    return _mm_sub_ps(a.v, b.v);
  }
  friend SseFloat operator*(SseFloat a, SseFloat b) {
    return _mm_mul_ps(a.v, b.v);
  }
  friend SseFloat operator/(SseFloat a, SseFloat b) {
    return _mm_div_ps(a.v, b.v);
  }

  // Comparisons return SseFloat masks (all-ones or all-zeros per lane).
  friend SseFloat operator<(SseFloat a, SseFloat b) {
    return _mm_cmplt_ps(a.v, b.v);
  }
  friend SseFloat operator>(SseFloat a, SseFloat b) {
    return _mm_cmpgt_ps(a.v, b.v);
  }
  friend SseFloat operator<=(SseFloat a, SseFloat b) {
    return _mm_cmple_ps(a.v, b.v);
  }
  friend SseFloat operator>=(SseFloat a, SseFloat b) {
    return _mm_cmpge_ps(a.v, b.v);
  }
  friend SseFloat operator==(SseFloat a, SseFloat b) {
    return _mm_cmpeq_ps(a.v, b.v);
  }
  friend SseFloat operator!=(SseFloat a, SseFloat b) {
    return _mm_cmpneq_ps(a.v, b.v);
  }

  // Logical NOT of a comparison mask.
  friend SseFloat operator!(SseFloat a) {
    return _mm_xor_ps(a.v, _mm_castsi128_ps(_mm_set1_epi32(-1)));
  }

  // Bitwise ops on float masks.
  friend SseFloat operator&(SseFloat a, SseFloat b) {
    return _mm_and_ps(a.v, b.v);
  }
  friend SseFloat operator|(SseFloat a, SseFloat b) {
    return _mm_or_ps(a.v, b.v);
  }
  friend SseFloat operator^(SseFloat a, SseFloat b) {
    return _mm_xor_ps(a.v, b.v);
  }
};

// 4-wide int32 SIMD wrapper around __m128i.
struct SseInt32 {
  __m128i v;

  SseInt32() = default;
  SseInt32(__m128i vec) : v(vec) {}
  SseInt32(int32_t i) : v(_mm_set1_epi32(i)) {}
  // Reinterpret from SseUint32 (no-op on __m128i).
  SseInt32(SseUint32 u);

  // Implicit conversion back to raw intrinsic type.
  operator __m128i() const {
    return v;
  }

  SseInt32 operator-() const {
    return _mm_sub_epi32(_mm_setzero_si128(), v);
  }

  SseInt32& operator+=(SseInt32 o) {
    v = _mm_add_epi32(v, o.v);
    return *this;
  }
  SseInt32& operator-=(SseInt32 o) {
    v = _mm_sub_epi32(v, o.v);
    return *this;
  }
  SseInt32& operator*=(SseInt32 o) {
    v = _mm_mullo_epi32(v, o.v);
    return *this;
  }
  SseInt32& operator&=(SseInt32 o) {
    v = _mm_and_si128(v, o.v);
    return *this;
  }
  SseInt32& operator|=(SseInt32 o) {
    v = _mm_or_si128(v, o.v);
    return *this;
  }

  // Shifts.
  SseInt32 operator<<(int n) const {
    return _mm_slli_epi32(v, n);
  }
  SseInt32 operator>>(int n) const {
    return _mm_srai_epi32(v, n); // Arithmetic shift right
  }
  SseInt32& operator<<=(int n) {
    v = _mm_slli_epi32(v, n);
    return *this;
  }
  SseInt32& operator>>=(int n) {
    v = _mm_srai_epi32(v, n);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend SseInt32 operator+(SseInt32 a, SseInt32 b) {
    return _mm_add_epi32(a.v, b.v);
  }
  friend SseInt32 operator-(SseInt32 a, SseInt32 b) {
    return _mm_sub_epi32(a.v, b.v);
  }
  friend SseInt32 operator*(SseInt32 a, SseInt32 b) {
    return _mm_mullo_epi32(a.v, b.v); // SSE4.1
  }

  // Bitwise.
  friend SseInt32 operator&(SseInt32 a, SseInt32 b) {
    return _mm_and_si128(a.v, b.v);
  }
  friend SseInt32 operator|(SseInt32 a, SseInt32 b) {
    return _mm_or_si128(a.v, b.v);
  }
  friend SseInt32 operator^(SseInt32 a, SseInt32 b) {
    return _mm_xor_si128(a.v, b.v);
  }

  friend SseInt32 operator~(SseInt32 a) {
    return _mm_xor_si128(a.v, _mm_set1_epi32(-1));
  }

  // Comparisons return SseInt32 masks.
  friend SseInt32 operator==(SseInt32 a, SseInt32 b) {
    return _mm_cmpeq_epi32(a.v, b.v);
  }
  friend SseInt32 operator!=(SseInt32 a, SseInt32 b) {
    return _mm_xor_si128(_mm_cmpeq_epi32(a.v, b.v), _mm_set1_epi32(-1));
  }
  friend SseInt32 operator<(SseInt32 a, SseInt32 b) {
    return _mm_cmplt_epi32(a.v, b.v);
  }
  friend SseInt32 operator>(SseInt32 a, SseInt32 b) {
    return _mm_cmpgt_epi32(a.v, b.v);
  }

  friend SseInt32 operator!(SseInt32 a) {
    return _mm_cmpeq_epi32(a.v, _mm_setzero_si128());
  }
};

// 4-wide uint32 SIMD wrapper around __m128i.
struct SseUint32 {
  __m128i v;

  SseUint32() = default;
  SseUint32(__m128i vec) : v(vec) {}
  SseUint32(uint32_t u) : v(_mm_set1_epi32(static_cast<int32_t>(u))) {}
  // Reinterpret from SseInt32 (no-op on __m128i).
  SseUint32(SseInt32 i) : v(i.v) {}

  // Implicit conversion back to raw intrinsic type.
  operator __m128i() const {
    return v;
  }

  SseUint32& operator+=(SseUint32 o) {
    v = _mm_add_epi32(v, o.v);
    return *this;
  }
  SseUint32& operator-=(SseUint32 o) {
    v = _mm_sub_epi32(v, o.v);
    return *this;
  }
  SseUint32& operator*=(SseUint32 o) {
    v = _mm_mullo_epi32(v, o.v);
    return *this;
  }
  SseUint32& operator&=(SseUint32 o) {
    v = _mm_and_si128(v, o.v);
    return *this;
  }
  SseUint32& operator|=(SseUint32 o) {
    v = _mm_or_si128(v, o.v);
    return *this;
  }

  // Shifts.
  SseUint32 operator<<(int n) const {
    return _mm_slli_epi32(v, n);
  }
  SseUint32 operator>>(int n) const {
    return _mm_srli_epi32(v, n); // Logical shift right
  }
  SseUint32& operator<<=(int n) {
    v = _mm_slli_epi32(v, n);
    return *this;
  }
  SseUint32& operator>>=(int n) {
    v = _mm_srli_epi32(v, n);
    return *this;
  }

  // Arithmetic (non-member friends for symmetric implicit conversions).
  friend SseUint32 operator+(SseUint32 a, SseUint32 b) {
    return _mm_add_epi32(a.v, b.v);
  }
  friend SseUint32 operator-(SseUint32 a, SseUint32 b) {
    return _mm_sub_epi32(a.v, b.v);
  }
  friend SseUint32 operator*(SseUint32 a, SseUint32 b) {
    return _mm_mullo_epi32(a.v, b.v);
  }

  // Bitwise.
  friend SseUint32 operator&(SseUint32 a, SseUint32 b) {
    return _mm_and_si128(a.v, b.v);
  }
  friend SseUint32 operator|(SseUint32 a, SseUint32 b) {
    return _mm_or_si128(a.v, b.v);
  }
  friend SseUint32 operator^(SseUint32 a, SseUint32 b) {
    return _mm_xor_si128(a.v, b.v);
  }

  friend SseUint32 operator~(SseUint32 a) {
    return _mm_xor_si128(a.v, _mm_set1_epi32(-1));
  }

  // Unsigned comparisons need XOR with sign bit to convert to signed domain.
  friend SseUint32 operator==(SseUint32 a, SseUint32 b) {
    return _mm_cmpeq_epi32(a.v, b.v);
  }
  friend SseUint32 operator!=(SseUint32 a, SseUint32 b) {
    return _mm_xor_si128(_mm_cmpeq_epi32(a.v, b.v), _mm_set1_epi32(-1));
  }
  friend SseUint32 operator>(SseUint32 a, SseUint32 b) {
    __m128i bias = _mm_set1_epi32(static_cast<int32_t>(0x80000000u));
    return _mm_cmpgt_epi32(_mm_xor_si128(a.v, bias), _mm_xor_si128(b.v, bias));
  }
  friend SseUint32 operator<(SseUint32 a, SseUint32 b) {
    return b > a;
  }

  friend SseUint32 operator!(SseUint32 a) {
    return _mm_cmpeq_epi32(a.v, _mm_setzero_si128());
  }
};

// SseFloat ↔ SseInt32 conversion.
inline SseFloat::SseFloat(SseInt32 i) : v(_mm_cvtepi32_ps(i.v)) {}

// SseInt32 ↔ SseUint32 reinterpret (no-op on __m128i).
inline SseInt32::SseInt32(SseUint32 u) : v(u.v) {}

// Map raw __m128 to SseFloat for SimdTypeFor.
template <>
struct SimdTypeFor<__m128> {
  using type = SseFloat;
};

// --- bit_cast specializations ---

template <>
inline SseInt32 bit_cast<SseInt32>(const SseFloat& f) noexcept {
  return _mm_castps_si128(f.v);
}
template <>
inline SseUint32 bit_cast<SseUint32>(const SseFloat& f) noexcept {
  return _mm_castps_si128(f.v);
}
template <>
inline SseFloat bit_cast<SseFloat>(const SseInt32& i) noexcept {
  return _mm_castsi128_ps(i.v);
}
template <>
inline SseFloat bit_cast<SseFloat>(const SseUint32& u) noexcept {
  return _mm_castsi128_ps(u.v);
}
template <>
inline SseInt32 bit_cast<SseInt32>(const SseUint32& u) noexcept {
  return u.v;
}
template <>
inline SseUint32 bit_cast<SseUint32>(const SseInt32& i) noexcept {
  return i.v;
}

// --- FloatTraits<SseFloat> ---

template <>
struct FloatTraits<SseFloat> {
  using IntType = SseInt32;
  using UintType = SseUint32;
  using BoolType = SseFloat; // Float comparison masks

  static constexpr uint32_t kOne = 0x3f800000;
  static constexpr float kMagic = 12582912.f;
  static constexpr bool kBoolIsMask = true;

  static DISPENSO_INLINE SseFloat sqrt(SseFloat x) {
    return _mm_sqrt_ps(x.v);
  }

  static DISPENSO_INLINE SseFloat rcp(SseFloat x) {
    return _mm_rcp_ps(x.v);
  }

  static DISPENSO_INLINE SseFloat fma(SseFloat a, SseFloat b, SseFloat c) {
#if defined(__FMA__)
    return _mm_fmadd_ps(a.v, b.v, c.v);
#else
    return _mm_add_ps(_mm_mul_ps(a.v, b.v), c.v);
#endif
  }

  // conditional: select x where mask is true, y where false.
  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(SseFloat mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg conditional(SseInt32 mask, Arg x, Arg y);

  template <typename Arg>
  static DISPENSO_INLINE Arg apply(SseFloat mask, Arg x);

  static DISPENSO_INLINE SseFloat min(SseFloat a, SseFloat b) {
    return _mm_min_ps(a.v, b.v);
  }

  static DISPENSO_INLINE SseFloat max(SseFloat a, SseFloat b) {
    return _mm_max_ps(a.v, b.v);
  }
};

// conditional specializations.
template <>
inline SseFloat FloatTraits<SseFloat>::conditional(SseFloat mask, SseFloat x, SseFloat y) {
  return _mm_blendv_ps(y.v, x.v, mask.v);
}
template <>
inline SseInt32 FloatTraits<SseFloat>::conditional(SseFloat mask, SseInt32 x, SseInt32 y) {
  return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(y.v), _mm_castsi128_ps(x.v), mask.v));
}
template <>
inline SseUint32 FloatTraits<SseFloat>::conditional(SseFloat mask, SseUint32 x, SseUint32 y) {
  return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(y.v), _mm_castsi128_ps(x.v), mask.v));
}

template <>
inline SseFloat FloatTraits<SseFloat>::conditional(SseInt32 mask, SseFloat x, SseFloat y) {
  return _mm_blendv_ps(y.v, x.v, _mm_castsi128_ps(mask.v));
}
template <>
inline SseInt32 FloatTraits<SseFloat>::conditional(SseInt32 mask, SseInt32 x, SseInt32 y) {
  return _mm_castps_si128(
      _mm_blendv_ps(_mm_castsi128_ps(y.v), _mm_castsi128_ps(x.v), _mm_castsi128_ps(mask.v)));
}
template <>
inline SseUint32 FloatTraits<SseFloat>::conditional(SseInt32 mask, SseUint32 x, SseUint32 y) {
  return _mm_castps_si128(
      _mm_blendv_ps(_mm_castsi128_ps(y.v), _mm_castsi128_ps(x.v), _mm_castsi128_ps(mask.v)));
}

// apply: mask & x (bitwise AND).
template <>
inline SseFloat FloatTraits<SseFloat>::apply(SseFloat mask, SseFloat x) {
  return _mm_and_ps(mask.v, x.v);
}
template <>
inline SseInt32 FloatTraits<SseFloat>::apply(SseFloat mask, SseInt32 x) {
  return _mm_and_si128(_mm_castps_si128(mask.v), x.v);
}
template <>
inline SseUint32 FloatTraits<SseFloat>::apply(SseFloat mask, SseUint32 x) {
  return _mm_and_si128(_mm_castps_si128(mask.v), x.v);
}

template <>
struct FloatTraits<SseInt32> {
  using IntType = SseInt32;
};

template <>
struct FloatTraits<SseUint32> {
  using IntType = SseUint32;
};

// --- Util function overloads for SSE types ---

DISPENSO_INLINE SseFloat floor_small(SseFloat x) {
  return _mm_floor_ps(x.v);
}

DISPENSO_INLINE SseInt32 convert_to_int_trunc(SseFloat f) {
  return _mm_cvttps_epi32(f.v);
}

DISPENSO_INLINE SseInt32 convert_to_int_trunc_safe(SseFloat f) {
  SseInt32 fi = bit_cast<SseInt32>(f);
  SseInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & SseInt32(_mm_cvttps_epi32(f.v));
}

DISPENSO_INLINE SseInt32 convert_to_int(SseFloat f) {
  // _mm_cvtps_epi32 uses round-to-nearest-even.
  // Mask non-normals to 0 to avoid undefined behavior.
  SseInt32 fi = bit_cast<SseInt32>(f);
  SseInt32 norm = (fi & 0x7f800000) != 0x7f800000;
  return norm & SseInt32(_mm_cvtps_epi32(f.v));
}

template <>
DISPENSO_INLINE SseFloat min<SseFloat>(SseFloat x, SseFloat mn) {
  // Ordering: if x is NaN, result is mn (second operand).
  return _mm_min_ps(x.v, mn.v);
}

template <>
DISPENSO_INLINE SseFloat clamp_allow_nan<SseFloat>(SseFloat x, SseFloat mn, SseFloat mx) {
  return _mm_max_ps(mn.v, _mm_min_ps(mx.v, x.v));
}

template <>
DISPENSO_INLINE SseFloat clamp_no_nan<SseFloat>(SseFloat x, SseFloat mn, SseFloat mx) {
  return _mm_max_ps(mn.v, _mm_min_ps(x.v, mx.v));
}

template <>
DISPENSO_INLINE SseFloat gather<SseFloat>(const float* table, SseInt32 index) {
  alignas(16) int32_t idx[4];
  _mm_store_si128(reinterpret_cast<__m128i*>(idx), index.v);
  return _mm_set_ps(table[idx[3]], table[idx[2]], table[idx[1]], table[idx[0]]);
}

DISPENSO_INLINE SseInt32 int_div_by_3(SseInt32 i) {
  // Multiply each lane by 0x55555556 and take the high 32 bits.
  // Process even and odd lanes separately since _mm_mul_epu32 only uses lanes 0,2.
  __m128i multiplier = _mm_set1_epi32(0x55555556);
  // Even lanes (0, 2).
  __m128i even = _mm_srli_epi64(_mm_mul_epu32(i.v, multiplier), 32);
  // Odd lanes (1, 3): shift right by 32 bits to put odd lanes in even positions.
  __m128i i_odd = _mm_srli_epi64(i.v, 32);
  __m128i odd = _mm_srli_epi64(_mm_mul_epu32(i_odd, multiplier), 32);
  odd = _mm_slli_epi64(odd, 32);
  // Blend even and odd results.
  return _mm_blend_epi16(even, odd, 0xCC); // 0xCC = 11001100b selects odd from odd
}

// nonnormal/nonnormalOrZero: return SseInt32 masks for SSE types.
DISPENSO_INLINE SseInt32 nonnormal(SseInt32 i) {
  return (i & 0x7f800000) == 0x7f800000;
}

DISPENSO_INLINE SseInt32 nonnormalOrZero(SseInt32 i) {
  auto m = i & 0x7f800000;
  return (m == 0x7f800000) | (m == 0);
}

DISPENSO_INLINE SseInt32 nonnormal(SseFloat f) {
  return nonnormal(bit_cast<SseInt32>(f));
}

// any_true: reduce SIMD mask to scalar bool (true if any lane is set).
DISPENSO_INLINE bool any_true(SseInt32 mask) {
  return _mm_movemask_ps(_mm_castsi128_ps(mask.v)) != 0;
}

DISPENSO_INLINE SseFloat signof(SseFloat x) {
  SseUint32 xi = bit_cast<SseUint32>(x);
  return bit_cast<SseFloat>((xi & 0x80000000u) | FloatTraits<SseFloat>::kOne);
}

DISPENSO_INLINE SseInt32 signofi(SseInt32 i) {
  return SseInt32(1) - (SseInt32(2) & (i < SseInt32(0)));
}

// nbool_as_one: 0 if mask is true (all-ones), 1 if false (all-zeros).
template <>
DISPENSO_INLINE SseFloat nbool_as_one<SseFloat, SseFloat>(SseFloat b) {
  // ~mask & 1.0f bits
  return bit_cast<SseFloat>(
      SseInt32(_mm_andnot_si128(_mm_castps_si128(b.v), _mm_set1_epi32(0x3f800000))));
}

template <>
DISPENSO_INLINE SseInt32 nbool_as_one<SseInt32, SseFloat>(SseFloat b) {
  return _mm_andnot_si128(_mm_castps_si128(b.v), _mm_set1_epi32(1));
}

template <>
DISPENSO_INLINE SseInt32 nbool_as_one<SseInt32, SseInt32>(SseInt32 b) {
  return _mm_andnot_si128(b.v, _mm_set1_epi32(1));
}

// bool_as_one: 1 if mask is true, 0 if false.
template <>
DISPENSO_INLINE SseFloat bool_as_one<SseFloat, SseFloat>(SseFloat b) {
  return bit_cast<SseFloat>(
      SseInt32(_mm_and_si128(_mm_castps_si128(b.v), _mm_set1_epi32(0x3f800000))));
}

template <>
DISPENSO_INLINE SseInt32 bool_as_one<SseInt32, SseFloat>(SseFloat b) {
  return _mm_and_si128(_mm_castps_si128(b.v), _mm_set1_epi32(1));
}

// bool_as_mask: for SIMD masks, identity (already a mask).
template <>
DISPENSO_INLINE SseInt32 bool_as_mask<SseInt32, SseFloat>(SseFloat b) {
  return _mm_castps_si128(b.v);
}

template <>
DISPENSO_INLINE SseInt32 bool_as_mask<SseInt32, SseInt32>(SseInt32 b) {
  return b;
}

template <>
DISPENSO_INLINE SseInt32 bool_as_mask<SseInt32, SseUint32>(SseUint32 b) {
  return b.v;
}

template <>
DISPENSO_INLINE SseUint32 bool_as_mask<SseUint32, SseFloat>(SseFloat b) {
  return _mm_castps_si128(b.v);
}

template <>
DISPENSO_INLINE SseUint32 bool_as_mask<SseUint32, SseUint32>(SseUint32 b) {
  return b;
}

template <>
DISPENSO_INLINE SseUint32 bool_as_mask<SseUint32, SseInt32>(SseInt32 b) {
  return b.v;
}

} // namespace fast_math
} // namespace dispenso

#endif // defined(__SSE4_1__)


================================================
FILE: dispenso/fast_math/simd.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Convenience header that provides DefaultSimdFloat — the widest available
// SIMD float type for the current platform. Include this header and use
// DefaultSimdFloat in generic code that should auto-vectorize to the best
// available SIMD width.
//
// Example:
//   #include <dispenso/fast_math/simd.h>
//   using F = dispenso::fast_math::DefaultSimdFloat;
//   F result = dispenso::fast_math::sin(F(1.0f));

#pragma once

// DefaultSimdFloat is defined in util.h (after SIMD backend includes).
// This header just pulls in fast_math.h which includes util.h.
#include <dispenso/fast_math/fast_math.h>


================================================
FILE: dispenso/fast_math/util.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <cstdint>
#include <tuple>

#if defined(__SSE__) || defined(__AVX__)
#include <immintrin.h>
#endif // __SSE__

#include <cstring>
#include <type_traits>

#include "float_traits.h"

namespace dispenso {
namespace fast_math {

// Reinterpret the bits of `src` as type `To`. Equivalent to std::bit_cast (C++20).
template <class To, class From>
DISPENSO_INLINE std::enable_if_t<
    sizeof(To) == sizeof(From) && std::is_trivially_copyable_v<From> &&
        std::is_trivially_copyable_v<To>,
    To>
bit_cast(const From& src) noexcept {
  static_assert(
      std::is_default_constructible_v<To>,
      "This implementation additionally requires "
      "destination type to be default constructible");

  To dst;
  memcpy(&dst, &src, sizeof(To));
  return dst;
}

// Return the ULP distance between two floats. Denormals are treated as zero.
// Handles mixed-sign comparisons correctly by mapping IEEE 754 bit patterns
// to a linear integer representation where negative floats map to negative integers.
DISPENSO_INLINE uint32_t float_distance(float a, float b) {
  uint32_t ai = bit_cast<uint32_t>(a);
  uint32_t bi = bit_cast<uint32_t>(b);

  // Handle denormal values as zero
  ai = ((ai & 0x7f800000) == 0) ? 0 : ai;
  bi = ((bi & 0x7f800000) == 0) ? 0 : bi;

  // Map to linear integer space: positive floats stay as-is,
  // negative floats map to negative integers (sign-magnitude → two's complement).
  auto toLinear = [](uint32_t bits) -> int32_t {
    return (bits & 0x80000000u) ? static_cast<int32_t>(0x80000000u - bits)
                                : static_cast<int32_t>(bits);
  };
  int32_t la = toLinear(ai);
  int32_t lb = toLinear(bi);
  int32_t diff = la - lb;
  return static_cast<uint32_t>(diff < 0 ? -diff : diff);
}

// Absolute value: clears the sign bit.  Works for all float/SIMD types.
template <typename Flt>
DISPENSO_INLINE Flt fabs(Flt x) {
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return fabs(SimdType_t<Flt>(x)).v;
  } else {
    using Uint = UintType_t<Flt>;
    return bit_cast<Flt>(bit_cast<Uint>(x) & Uint(0x7fffffff));
  }
}

// Return +1.0 or -1.0 matching the sign bit of x. +0 gives +1, -0 gives -1.
template <typename Flt>
DISPENSO_INLINE Flt signof(Flt x) {
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return signof(SimdType_t<Flt>(x)).v;
  } else {
    using Uint = UintType_t<Flt>;
    return bit_cast<Flt>((bit_cast<Uint>(x) & 0x80000000) | FloatTraits<Flt>::kOne);
  }
}

// Integer sign: returns +1 for i >= 0, -1 for i < 0. Scalar version (kBoolIsMask=false).
template <typename Flt>
DISPENSO_INLINE std::enable_if_t<!FloatTraits<Flt>::kBoolIsMask, IntType_t<Flt>> signofi(
    IntType_t<Flt> i) {
  return 1 - 2 * (i < 0);
}

// Integer sign: returns +1 for i >= 0, -1 for i < 0. SIMD mask version (kBoolIsMask=true).
template <typename Flt>
DISPENSO_INLINE std::enable_if_t<FloatTraits<Flt>::kBoolIsMask, IntType_t<Flt>> signofi(
    IntType_t<Flt> i) {
  // Explicit IntType_t cast handles AVX-512 where (i < 0) returns Avx512Mask, not Avx512Int32.
  auto neg = IntType_t<Flt>(i < 0);
  return 1 - (2 & neg);
}

// True if the float (as int bits) has all exponent bits set (inf or NaN).
// Returns bool for scalar types, SIMD lane mask for SIMD types.
template <typename Flt>
DISPENSO_INLINE auto nonnormal(IntType_t<Flt> i) {
  return (i & 0x7f800000) == 0x7f800000;
}

// True if the float (as int bits) is inf, NaN, or zero.
// Returns bool for scalar types, SIMD lane mask for SIMD types.
template <typename Flt>
DISPENSO_INLINE auto nonnormalOrZero(IntType_t<Flt> i) {
  auto m = i & 0x7f800000;
  return (m == 0x7f800000) | (m == 0);
}

template <typename Flt>
DISPENSO_INLINE auto nonnormal(Flt f) {
  return nonnormal<Flt>(bit_cast<IntType_t<Flt>>(f));
}

// Truncate float toward zero, returning int. No inf/NaN guard — caller must
// ensure inputs are finite (e.g. after range reduction by a finite constant).
// Maps to cvttps2epi32 (SSE), vcvtq_s32_f32 (NEON), or (int)f (scalar).
template <typename Flt>
DISPENSO_INLINE IntType_t<Flt> convert_to_int_trunc(Flt f) {
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return convert_to_int_trunc(SimdType_t<Flt>(f)).v;
  } else {
    return static_cast<IntType_t<Flt>>(f);
  }
}

// Truncate float toward zero, returning int. Inf/NaN lanes are zeroed.
// Use when the input may contain non-finite values.
template <typename Flt>
DISPENSO_INLINE IntType_t<Flt> convert_to_int_trunc_safe(Flt f) {
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return convert_to_int_trunc_safe(SimdType_t<Flt>(f)).v;
  } else {
    auto fi = bit_cast<IntType_t<Flt>>(f);
    if ((fi & 0x7f800000) == 0x7f800000) {
      return 0;
    }
    return static_cast<IntType_t<Flt>>(f);
  }
}

// Convert float to int using round-to-nearest-even (SSE cvtss2si semantics).
// For non-normal inputs (inf/NaN), returns 0 to avoid undefined behavior.
template <typename Flt>
DISPENSO_INLINE IntType_t<Flt> convert_to_int(Flt f) {
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return convert_to_int(SimdType_t<Flt>(f)).v;
  } else {
#if defined(__SSE__)
    return _mm_cvtss_si32(_mm_set_ss(f));
#else
    // Round to nearest even via magic number addition, guard non-normals → 0.
    auto fi = bit_cast<IntType_t<Flt>>(f);
    if ((fi & 0x7f800000) == 0x7f800000) {
      return 0;
    }
    constexpr float kMagic = FloatTraits<Flt>::kMagic; // 1.5f * 2^23
    return static_cast<IntType_t<Flt>>((f + kMagic) - kMagic);
#endif // __SSE__
  }
}

// Convert float to int with round-to-nearest-even, clamping to [kMin, kMax].
// For non-normal inputs (inf/NaN), returns 0 to avoid undefined behavior.
template <typename Flt, IntType_t<Flt> kMin, IntType_t<Flt> kMax>
DISPENSO_INLINE IntType_t<Flt> convert_to_int_clamped(Flt f) {
#if defined(__SSE4_1__)
  static const __m128i kMn = _mm_set1_epi32(kMin);
  static const __m128i kMx = _mm_set1_epi32(kMax);
  __m128i i = _mm_cvtps_epi32(_mm_set_ss(f));
  i = _mm_min_epi32(i, kMx);
  i = _mm_max_epi32(i, kMn);
  return _mm_cvtsi128_si32(i);
#else
  // Round to nearest even via magic number addition (same technique as rangeReduce),
  // then clamp. Guard against non-normal inputs (inf/NaN) → return 0.
  auto fi = bit_cast<IntType_t<Flt>>(f);
  if ((fi & 0x7f800000) == 0x7f800000) {
    return 0;
  }
  constexpr float kMagic = FloatTraits<Flt>::kMagic; // 1.5f * 2^23
  auto rounded = static_cast<IntType_t<Flt>>((f + kMagic) - kMagic);
  if (rounded > kMax)
    return kMax;
  if (rounded < kMin)
    return kMin;
  return rounded;
#endif // __SSE4_1__
}

// Floor for values within integer range. Uses SSE4.1 roundss when available.
template <typename Flt>
DISPENSO_INLINE Flt floor_small(Flt x) {
  if constexpr (!std::is_same_v<Flt, SimdType_t<Flt>>) {
    return floor_small(SimdType_t<Flt>(x)).v;
  } else {
    IntType_t<Flt> i = x;
    Flt xi = i;
    if constexpr (FloatTraits<Flt>::kBoolIsMask) {
      return i - ((x < xi) & 1);
    } else {
      return i - (x < xi) * 1;
    }
  }
}

template <>
DISPENSO_INLINE float floor_small(float x) {
#if defined(__SSE4_1__)
  __m128 f = _mm_set_ss(x);
  __m128 r = _mm_floor_ss(f, f);
  return _mm_cvtss_f32(r);
#else
  return std::floor(x);
#endif // __SSE4_1__
}

// Minimum of x and mn. If x is NaN and mn is not, returns mn (relied-upon NaN behavior).
template <typename Flt>
DISPENSO_INLINE Flt min(Flt x, Flt mn);

template <>
DISPENSO_INLINE float min(float x, float mn) {
#if defined(__SSE4_1__)
  __m128 f = _mm_set_ss(x);
  __m128 fmn = _mm_set_ss(mn);
  // Ordering matters here for NaN behavior
  __m128 r = _mm_min_ss(f, fmn);
  return _mm_cvtss_f32(r);
#else
  return x < mn ? x : mn;
#endif //__SSE4_1__
}

// Clamp x to [mn, mx]. If x is NaN, NaN propagates (result is NaN).
// Use when NaN should signal an error rather than be silently clamped.
template <typename Flt>
DISPENSO_INLINE Flt clamp_allow_nan(Flt x, Flt mn, Flt mx);

template <>
DISPENSO_INLINE float clamp_allow_nan(float x, float mn, float mx) {
#if defined(__SSE4_1__)
  __m128 f = _mm_set_ss(x);
  __m128 fmn = _mm_set_ss(mn);
  __m128 fmx = _mm_set_ss(mx);
  // Ordering matters here for NaN behavior
  __m128 r = _mm_max_ss(fmn, _mm_min_ss(fmx, f));
  return _mm_cvtss_f32(r);
#else
  mx = (mx < x) ? mx : x;
  return (mx < mn) ? mn : mx;
#endif // __SSE4_1__
}

// Clamp x to [mn, mx]. If x is NaN, returns a value in [mn, mx] (NaN is suppressed).
// Use when a valid output is required regardless of input.
template <typename Flt>
DISPENSO_INLINE Flt clamp_no_nan(Flt x, Flt mn, Flt mx);

template <>
DISPENSO_INLINE float clamp_no_nan(float x, float mn, float mx) {
#if defined(__SSE4_1__)
  __m128 f = _mm_set_ss(x);
  __m128 fmn = _mm_set_ss(mn);
  __m128 fmx = _mm_set_ss(mx);
  // Ordering matters here for NaN behavior
  __m128 r = _mm_max_ss(fmn, _mm_min_ss(f, fmx));
  return _mm_cvtss_f32(r);

  // TODO: See vrndmq_f32 for ARM NEON

#else
  mx = (mx > x) ? x : mx;
  return (mx > mn) ? mx : mn;
#endif // __SSE4_1__
}

// Load table[index]. Scalar version is a plain array access; SIMD versions use gather instructions.
template <typename Flt>
DISPENSO_INLINE Flt gather(const float* table, IntType_t<Flt> index);

template <>
DISPENSO_INLINE float gather(const float* table, int32_t index) {
  return table[index];
}

// Return 0 if b is true, 1 if b is false (negated bool as numeric).
template <typename T, typename BoolT>
DISPENSO_INLINE T nbool_as_one(BoolT b);
template <>
DISPENSO_INLINE float nbool_as_one<float, bool>(bool b) {
  return b ? 0.0f : 1.0f;
}

template <>
DISPENSO_INLINE int32_t nbool_as_one<int32_t, bool>(bool b) {
  return static_cast<int32_t>(!b);
}

// Return 1 if b is true, 0 if b is false (bool as numeric).
template <typename T, typename BoolT>
DISPENSO_INLINE T bool_as_one(BoolT b);
template <>
DISPENSO_INLINE float bool_as_one<float, bool>(bool b) {
  return b ? 1.0f : 0.0f;
}

template <>
DISPENSO_INLINE int32_t bool_as_one<int32_t, bool>(bool b) {
  return static_cast<int32_t>(b);
}

// Convert bool to bitmask: true → all-ones (0xFFFFFFFF), false → 0x0.
// For SIMD types, this converts a lane mask to an integer mask for bitwise ops.
template <typename T, typename BoolT>
DISPENSO_INLINE T bool_as_mask(BoolT b) {
  T mask = b;
  return ~(mask - 1);
}

// Return val if b is true, 0 otherwise. Uses bitwise AND via bool_as_mask.
template <typename T, typename BoolT>
DISPENSO_INLINE T bool_apply_or_zero(BoolT b, T val) {
  return bit_cast<T>(bool_as_mask<IntType_t<T>>(b) & bit_cast<IntType_t<T>>(val));
}

// True if x is an integer. Safe for all float values including large, inf, NaN.
// All floats with |x| >= 2^23 are integers (mantissa has no fractional bits).
// For |x| < 2^23, floor_small(x) is valid (no int overflow).
template <typename Flt>
DISPENSO_INLINE BoolType_t<Flt> float_is_int(Flt x) {
  auto ax = fabs(x);
  return (ax >= Flt(8388608.0f)) | (floor_small(x) == x);
}

// True if x is an odd integer.
// For |x| >= 2^24: ULP >= 2, so only even integers are representable → always false.
template <typename Flt>
DISPENSO_INLINE BoolType_t<Flt> float_is_odd(Flt x) {
  if constexpr (std::is_same_v<Flt, float>) {
    return float_is_int(x) && !float_is_int(x * 0.5f);
  } else {
    return float_is_int(x) & !float_is_int(x * Flt(0.5f));
  }
}

// Fast integer division by 3 using multiply-and-shift. Used in cbrt magic constant computation.
DISPENSO_INLINE int32_t int_div_by_3(int32_t i) {
  return static_cast<int32_t>((uint64_t(i) * 0x55555556) >> 32);
}

// ---------------------------------------------------------------------------
// Polynomial evaluation: hornerEval, estrinEval, polyEval
// ---------------------------------------------------------------------------
//
// All take coefficients in HIGH-to-LOW order (highest degree first):
//   polyEval(x, cn, cn-1, ..., c1, c0) = cn*x^n + cn-1*x^(n-1) + ... + c1*x + c0
//
// This matches the natural Horner evaluation order and the existing hand-written
// FMA chains in fast_math. For odd/even polynomial forms like tanh(x) = x*(1+x²*Q(x²)),
// compose as: x * (1 + x2 * polyEval(x2, qn, ..., q1, q0)).

namespace detail {

// --- Horner evaluation ---
// Sequential FMA chain, optimal for low-ILP targets (GPU, scalar, narrow SIMD).
// Degree N polynomial = N FMAs.

template <typename Flt>
DISPENSO_INLINE Flt hornerImpl(Flt, Flt accum) {
  return accum;
}

template <typename Flt, typename... Cs>
DISPENSO_INLINE Flt hornerImpl(Flt x, Flt accum, Flt next, Cs... rest) {
  return hornerImpl(x, FloatTraits<Flt>::fma(accum, x, next), rest...);
}

// --- Estrin evaluation ---
// Tree-reduces paired coefficients at each level, cutting critical-path depth
// from N to ceil(log2(N+1)) at the cost of extra multiplies for x powers.
// Better for wide SIMD with high ILP (AVX, AVX-512).
//
// Generic peel-and-recurse via tuples. At each level:
//   1. Pair adjacent values: (a,b) → fma(a, xp, b)
//   2. Carry unpaired odd element
//   3. Recurse with xp² and the paired results
//
// All tuple operations (make_tuple, tuple_cat, apply) are eliminated by the
// optimizer — verified to produce identical assembly to hand-written FMA trees
// with clang 21, GCC 11, and GCC 15 at -O2.

template <typename Flt>
struct EstrinImpl {
  // Base cases
  DISPENSO_INLINE static Flt reduce(Flt, std::tuple<Flt> done) {
    return std::get<0>(done);
  }

  DISPENSO_INLINE static Flt reduce(Flt xp, std::tuple<Flt, Flt> done) {
    return FloatTraits<Flt>::fma(std::get<0>(done), xp, std::get<1>(done));
  }

  // General: unpack tuple, pair all elements at this level, recurse
  template <typename... Done>
  DISPENSO_INLINE static Flt reduce(Flt xp, std::tuple<Done...> done) {
    return std::apply([xp](auto... ds) { return pairLevel(xp, std::tuple<>{}, ds...); }, done);
  }

  // Done pairing at this level — recurse with xp²
  template <typename... Paired>
  DISPENSO_INLINE static Flt pairLevel(Flt xp, std::tuple<Paired...> paired) {
    return reduce(xp * xp, paired);
  }

  // Odd leftover — carry forward unpaired
  template <typename... Paired>
  DISPENSO_INLINE static Flt pairLevel(Flt xp, std::tuple<Paired...> paired, Flt a) {
    return reduce(xp * xp, std::tuple_cat(paired, std::make_tuple(a)));
  }

  // Peel two from front, pair via FMA, accumulate into paired tuple
  template <typename... Paired, typename... Rest>
  DISPENSO_INLINE static Flt
  pairLevel(Flt xp, std::tuple<Paired...> paired, Flt a, Flt b, Rest... rest) {
    return pairLevel(
        xp, std::tuple_cat(paired, std::make_tuple(FloatTraits<Flt>::fma(a, xp, b))), rest...);
  }
};

} // namespace detail

// Horner evaluation: hornerEval(x, cn, cn-1, ..., c0) = ((cn*x + cn-1)*x + ...)*x + c0
// Coefficients HIGH-to-LOW (highest degree first).
// Flt is deduced from x; coefficients are converted to Flt internally.
template <typename Flt, typename C0, typename... Cs>
DISPENSO_INLINE Flt hornerEval(Flt x, C0 cn, Cs... rest) {
  return detail::hornerImpl(x, Flt(cn), Flt(rest)...);
}

// Estrin evaluation: same semantics as hornerEval, but uses tree-reduction
// for lower critical-path depth (ceil(log2(N)) vs N dependent FMAs).
// Coefficients HIGH-to-LOW.
template <typename Flt, typename C0, typename... Cs>
DISPENSO_INLINE Flt estrinEval(Flt x, C0 cn, Cs... rest) {
  return detail::EstrinImpl<Flt>::reduce(x, std::make_tuple(Flt(cn), Flt(rest)...));
}

// Platform-adaptive polynomial evaluation.
// Uses Estrin on CPU (tree reduction for ILP on wide SIMD), Horner on GPU
// (sequential FMA chain, no wasted registers on in-order pipelines).
// Use polyEval only at call sites where Estrin has been measured beneficial.
// Use hornerEval directly for accuracy-sensitive polynomials where Estrin's
// different rounding order would regress ULP.
// Coefficients HIGH-to-LOW: polyEval(x, cn, ..., c0).
template <typename Flt, typename C0, typename... Cs>
DISPENSO_INLINE Flt polyEval(Flt x, C0 cn, Cs... rest) {
#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__)
  return hornerEval(x, cn, rest...);
#else
  return estrinEval(x, cn, rest...);
#endif
}

} // namespace fast_math
} // namespace dispenso

// Auto-detect SIMD backends and include FloatTraits specializations.
#if defined(__SSE4_1__)
#include <dispenso/fast_math/float_traits_x86.h>
#endif

#if defined(__AVX2__)
#include <dispenso/fast_math/float_traits_avx.h>
#endif

#if defined(__AVX512F__)
#include <dispenso/fast_math/float_traits_avx512.h>
#endif

#if defined(__aarch64__)
#include <dispenso/fast_math/float_traits_neon.h>
#endif

#if __has_include("hwy/highway.h")
#include <dispenso/fast_math/float_traits_hwy.h>
#endif

namespace dispenso {
namespace fast_math {

// Best available SIMD float type for the current platform.
// Prefer native intrinsic wrappers over Highway for lower overhead.
// Highway is a fallback for platforms without a native wrapper.
#if defined(__aarch64__)
using DefaultSimdFloat = NeonFloat;
#elif defined(__AVX512F__)
using DefaultSimdFloat = Avx512Float;
#elif defined(__AVX2__)
using DefaultSimdFloat = AvxFloat;
#elif defined(__SSE4_1__)
using DefaultSimdFloat = SseFloat;
#elif __has_include("hwy/highway.h")
using DefaultSimdFloat = HwyFloat;
#else
using DefaultSimdFloat = float;
#endif

} // namespace fast_math
} // namespace dispenso


================================================
FILE: dispenso/for_each.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file for_each.h
 * @ingroup group_parallel
 * Functions for performing parallel for_each over iterables.  This intends to more-or-less
 * mimic std::for_each, with a possible (Concurrent)TaskSet passed in for external wait capability,
 * and ForEachOptions for controlling the wait behavior and limiting of parallelism.
 **/

#pragma once

#include <algorithm>
#include <iterator>

#include <dispenso/detail/per_thread_info.h>
#include <dispenso/small_vector.h>
#include <dispenso/task_set.h>

namespace dispenso {

#if DISPENSO_HAS_CONCEPTS
/**
 * @concept ForEachFunc
 * @brief A callable suitable for for_each operations.
 *
 * The callable must be invocable with a reference to the iterator's value type.
 **/
template <typename F, typename Iter>
concept ForEachFunc = std::invocable<F, decltype(*std::declval<Iter>())>;
#endif // DISPENSO_HAS_CONCEPTS

/**
 * A set of options to control for_each
 **/
struct ForEachOptions {
  /**
   * The maximum number of threads to use.  This can be used to limit the number of threads below
   * the number associated with the TaskSet's thread pool to control the degree of concurrency.
   * Setting maxThreads to zero or one will result in serial operation.
   **/
  uint32_t maxThreads = std::numeric_limits<int32_t>::max();
  /**
   * Specify whether the return of the for_each signifies the work is complete.  If the
   * for_each is initiated without providing a TaskSet, the for_each will always wait.
   *
   * @note If wait is true, the calling thread will always participate in computation.  If this is
   * not desired, pass wait as false, and wait manually outside of the for_each on the passed
   * TaskSet.
   **/
  bool wait = true;
};

namespace detail {

// Random-access iterators: compute chunk boundaries arithmetically, avoiding SmallVector.
template <typename TaskSetT, typename Iter, typename F>
void for_each_n_schedule(
    TaskSetT& tasks,
    Iter start,
    F&& f,
    ssize_t numThreads,
    size_t chunkSize,
    ssize_t transitionIdx,
    size_t smallChunkSize,
    const ForEachOptions& options,
    std::random_access_iterator_tag) {
  ssize_t numToSchedule = options.wait ? numThreads - 1 : numThreads;

  if (numToSchedule > 0) {
    tasks.scheduleBulk(
        static_cast<size_t>(numToSchedule),
        [start, &f, chunkSize, smallChunkSize, transitionIdx](size_t idx) {
          ssize_t sidx = static_cast<ssize_t>(idx);
          ssize_t offset;
          ssize_t thisChunkSize;
          if (sidx < transitionIdx) {
            offset = sidx * static_cast<ssize_t>(chunkSize);
            thisChunkSize = static_cast<ssize_t>(chunkSize);
          } else {
            offset = transitionIdx * static_cast<ssize_t>(chunkSize) +
                (sidx - transitionIdx) * static_cast<ssize_t>(smallChunkSize);
            thisChunkSize = static_cast<ssize_t>(smallChunkSize);
          }
          Iter s = start + offset;
          Iter e = s + thisChunkSize;
          return [s, e, f]() {
            auto recurseInfo = PerPoolPerThreadInfo::parForRecurse();
            for (Iter it = s; it != e; ++it) {
              f(*it);
            }
          };
        });
  }

  if (options.wait) {
    ssize_t lastIdx = numThreads - 1;
    ssize_t offset;
    ssize_t thisChunkSize;
    if (lastIdx < transitionIdx) {
      offset = lastIdx * static_cast<ssize_t>(chunkSize);
      thisChunkSize = static_cast<ssize_t>(chunkSize);
    } else {
      offset = transitionIdx * static_cast<ssize_t>(chunkSize) +
          (lastIdx - transitionIdx) * static_cast<ssize_t>(smallChunkSize);
      thisChunkSize = static_cast<ssize_t>(smallChunkSize);
    }
    Iter lastStart = start + offset;
    Iter lastEnd = lastStart + thisChunkSize;
    for (Iter it = lastStart; it != lastEnd; ++it) {
      f(*it);
    }
    tasks.wait();
  }
}

// Non-random-access iterators: pre-compute boundary iterators into SmallVector.
template <typename TaskSetT, typename Iter, typename F, typename IterCategory>
void for_each_n_schedule(
    TaskSetT& tasks,
    Iter start,
    F&& f,
    ssize_t numThreads,
    size_t chunkSize,
    ssize_t transitionIdx,
    size_t smallChunkSize,
    const ForEachOptions& options,
    IterCategory) {
  SmallVector<Iter, 64> boundaries;
  boundaries.reserve(static_cast<size_t>(numThreads) + 1);
  boundaries.push_back(start);
  for (ssize_t t = 0; t < numThreads; ++t) {
    size_t cs = (t < transitionIdx) ? chunkSize : smallChunkSize;
    Iter next = boundaries[t];
    std::advance(next, static_cast<ptrdiff_t>(cs));
    boundaries.push_back(next);
  }

  ssize_t numToSchedule = options.wait ? numThreads - 1 : numThreads;

  if (numToSchedule > 0) {
    tasks.scheduleBulk(static_cast<size_t>(numToSchedule), [&boundaries, &f](size_t idx) {
      return [s = boundaries[idx], e = boundaries[idx + 1], f]() {
        auto recurseInfo = PerPoolPerThreadInfo::parForRecurse();
        for (Iter it = s; it != e; ++it) {
          f(*it);
        }
      };
    });
  }

  if (options.wait) {
    for (Iter it = boundaries[numThreads - 1]; it != boundaries[numThreads]; ++it) {
      f(*it);
    }
    tasks.wait();
  }
}

} // namespace detail

/**
 * A function like std::for_each_n, but where the function is invoked in parallel across the passed
 * range.
 *
 * @param tasks The task set to schedule the for_each on.
 * @param start The iterator for the start of the range.
 * @param n The length of the range.
 * @param f The function to execute in parallel.  This is a unary function that must be capable of
 * taking dereference of Iter.
 * @param options See ForEachOptions for details.
 **/
template <typename TaskSetT, typename Iter, typename F>
DISPENSO_REQUIRES(ForEachFunc<F, Iter>)
void for_each_n(TaskSetT& tasks, Iter start, size_t n, F&& f, ForEachOptions options = {}) {
  // TODO(bbudge): With options.maxThreads, we might want to allow a small fanout factor in
  // recursive case?
  if (!n || !options.maxThreads || detail::PerPoolPerThreadInfo::isParForRecursive(&tasks.pool())) {
    for (size_t i = 0; i < n; ++i) {
      f(*start);
      ++start;
    }
    if (options.wait) {
      tasks.wait();
    }
    return;
  }

  // 0 indicates serial execution per API spec
  int32_t maxThreads = std::max<int32_t>(options.maxThreads, 1);

  ssize_t numThreads = std::min<ssize_t>(tasks.numPoolThreads() + options.wait, maxThreads);
  // Reduce threads used if they exceed work to be done.
  numThreads = std::min<ssize_t>(numThreads, n);

  auto chunking = detail::staticChunkSize(n, numThreads);
  size_t chunkSize = chunking.ceilChunkSize;

  bool perfectlyChunked = chunking.transitionTaskIndex == numThreads;
  ssize_t transitionIdx = chunking.transitionTaskIndex;
  size_t smallChunkSize = chunkSize - !perfectlyChunked;

  detail::for_each_n_schedule(
      tasks,
      start,
      std::forward<F>(f),
      numThreads,
      chunkSize,
      transitionIdx,
      smallChunkSize,
      options,
      typename std::iterator_traits<Iter>::iterator_category{});
}

/**
 * A function like std::for_each_n, but where the function is invoked in parallel across the passed
 * range.
 *
 * @param start The iterator for the start of the range.
 * @param n The length of the range.
 * @param f The function to execute in parallel.  This is a unary function that must be capable of
 * taking dereference of Iter.
 * @param options See ForEachOptions for details; however it should be noted that this function must
 * always wait, and therefore options.wait is ignored.
 **/
template <typename Iter, typename F>
DISPENSO_REQUIRES(ForEachFunc<F, Iter>)
void for_each_n(Iter start, size_t n, F&& f, ForEachOptions options = {}) {
  TaskSet taskSet(globalThreadPool());
  options.wait = true;
  for_each_n(taskSet, start, n, std::forward<F>(f), options);
}

/**
 * A function like std::for_each, but where the function is invoked in parallel across the passed
 * range.
 *
 * @param tasks The task set to schedule the for_each on.
 * @param start The iterator for the start of the range.
 * @param end The iterator for the end of the range.
 * @param f The function to execute in parallel.  This is a unary function that must be capable of
 * taking dereference of Iter.
 * @param options See ForEachOptions for details.
 **/
template <typename TaskSetT, typename Iter, typename F>
DISPENSO_REQUIRES(ForEachFunc<F, Iter>)
void for_each(TaskSetT& tasks, Iter start, Iter end, F&& f, ForEachOptions options = {}) {
  for_each_n(tasks, start, std::distance(start, end), std::forward<F>(f), options);
}

/**
 * A function like std::for_each, but where the function is invoked in parallel across the passed
 * range.
 *
 * @param start The iterator for the start of the range.
 * @param end The iterator for the end of the range.
 * @param f The function to execute in parallel.  This is a unary function that must be capable of
 * taking dereference of Iter.
 * @param options See ForEachOptions for details; however it should be noted that this function must
 * always wait, and therefore options.wait is ignored.
 **/
template <typename Iter, typename F>
DISPENSO_REQUIRES(ForEachFunc<F, Iter>)
void for_each(Iter start, Iter end, F&& f, ForEachOptions options = {}) {
  for_each_n(start, std::distance(start, end), std::forward<F>(f), options);
}

// TODO(bbudge): Implement ranges versions for these in C++20 (currently not in an env where this
// can be tested)

} // namespace dispenso


================================================
FILE: dispenso/future.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file future.h
 * @ingroup group_async
 * A file providing Futures, as well as functionality for creating futures, and for how to invoke
 * Futures' closures.
 *
 * See https://en.cppreference.com/w/cpp/experimental/future for details on the API.
 **/

#pragma once

#include <functional>
#include <vector>

#include <dispenso/detail/future_impl.h>
#include <dispenso/detail/result_of.h>
#include <dispenso/schedulable.h>

namespace dispenso {

// See https://en.cppreference.com/w/cpp/experimental/future for details on the API.

// TODO(bbudge): Implement when_any(), ?unwrapping constructor? functionality.

/**
 *  A <code>std::launch</code> policy specifying we won't force asynchronicity.  Opposite of
 * <code>std::launch::async</code>
 **/
constexpr std::launch kNotAsync = static_cast<std::launch>(0);
/**
 * A <code>std::launch</code> policy specifying we won't allow <code>Future::wait_for</code> and
 * <code>Future::wait_util</code> to invoke the Future's underlying functor.  Opposite of
 * <code>std::launch::deferred</code>
 **/
constexpr std::launch kNotDeferred = static_cast<std::launch>(0);

/**
 * A class that implements a hybrid of the interfaces for std::experimental::future, and
 * std::experimental::shared_future.  <code>Future</code> acts like a <code>shared_future</code>,
 * but includes the <code>share</code> function in order to enable generic code that may use the
 * function. See https://en.cppreference.com/w/cpp/experimental/future for more details on the API,
 * but some differences are notable.
 *
 * 1. <code>dispenso::Future</code> are created and set into executors through their constructor, or
 *    through <code>dispenso::async</code>
 * 2. <code>dispenso::future</code> can be shared around like
 *    <code>std::experimental::shared_future</code>, and wait/get functions may be called
 *    concurrently from multiple threads.
 *
 * Future is thread-safe to call into, with the usual caveats (one thread may not be calling
 * anything on the Future while another thread assigns to it or destructs it).  It is thread-safe to
 * assign or destruct on one copy of a Future while making calls on another copy of a Future
 * with the same backing state.
 *
 * Because Future is designed to work well with the dispenso TaskSet and ThreadPool, Future
 * aggressively work steals in <code>get</code> and <code>wait</code> functions.  This prevents
 * deadlock due to thread pool resource starvation, similar to how TaskSets avoid that flavor of
 * deadlock.  It is important to note that deadlock is still possible due to traditional cyclic
 * dependency, e.g. Future A and Future B which wait on each other.  Just as with a cyclic mutex
 * locking requirement, cyclic Future waits are considered programmer error.
 **/
template <typename Result>
class Future : detail::FutureBase<Result> {
  using Base = detail::FutureBase<Result>;

 public:
  /**
   * Construct an invalid Future.
   **/
  Future() noexcept : Base() {}

  /**
   * Move constructor
   *
   * @param f The future to move from.
   **/
  Future(Future&& f) noexcept : Base(std::move(f)) {}
  /** @copydoc Future(Future&&) */
  Future(Base&& f) noexcept : Base(std::move(f)) {}

  /**
   * Copy construct a Future.
   *
   * @param f The existing future to reference.  This essentially increments the reference count on
   * the future's backing state.
   **/
  Future(const Future& f) noexcept : Base(f) {}
  /** @copydoc Future(const Future&) */
  Future(const Base& f) noexcept : Base(f) {}

  /**
   * Construct a Future with callable and a schedulable (e.g. a ThreadPool or TaskSet), and ensure
   * it is scheduled according to the launch policy.
   *
   * @param f a functor with signature <code>Result(void)</code> to be invoked in order for
   * <code>get</code> to return a valid value.
   * @param schedulable A <code>TaskSet</code>, <code>ThreadPool</code>, or similar type that has
   * function <code>schedule</code> that takes a functor with signature <code>void()</code> and
   * <code>ForceQueuingTag</code>.
   * @param asyncPolicy If <code>std::launch::async</code>, the functor will be scheduled through to
   * the underlying thread pool work queue.
   * @param deferredPolicy If <code>std::launch::deferred</code>, <code>wait_for</code> and
   * <code>wait_until</code> may invoke the functor from their calling thread.
   **/
  template <typename F, typename Schedulable>
  Future(
      F&& f,
      Schedulable& schedulable,
      std::launch asyncPolicy = kNotAsync,
      std::launch deferredPolicy = std::launch::deferred)
      : Base(std::forward<F>(f), schedulable, asyncPolicy, deferredPolicy) {}

  /**
   * Move a Future
   *
   * @param f The Future whose backing state will be transferred to this Future.
   **/
  Future& operator=(Future&& f) noexcept {
    Base::move(reinterpret_cast<Base&&>(f));
    return *this;
  }
  /**
   * Copy a Future, which increments the underlying state reference count.
   *
   * @param f The Future whose backing state will be referenced by this Future.
   **/
  Future& operator=(const Future& f) {
    Base::copy(f);
    return *this;
  }

  /**
   * Destruct a Future.  This decrements the shared reference count (if any), and ensures the
   * backing state is destroyed when no more references exist to the backing state.
   **/
  ~Future() = default;

  /**
   * Is this Future valid?
   *
   * @return <code>true</code> if the Future was constructed with a functor/schedulable, with result
   * value, or indirectly from a Future that was constructed one of those ways.  <code>false</code>
   * if the Future was constructed via the default constructor, or indrectly from a Future that was
   * constructed that way.
   **/
  bool valid() const noexcept {
    return Base::valid();
  }

  /**
   * Is the Future ready?
   *
   * @return <code>true</code> if the value associated with this Future has already be computed, and
   * <code>get</code> can return the value immediately.  Returns <code>false</code> if the functor
   * is logically queued, or is in progress.
   **/
  bool is_ready() const {
    return Base::is_ready();
  }

  /**
   * Wait until <code>is_ready</code> is <code>true</code>.
   *
   * @note This function will invoke the functor if it is still logically queued.
   **/
  void wait() const {
    Base::wait();
  }

  /**
   * Wait until <code>is_ready</code> is <code>true</code> or until timing out.
   *
   * @param timeoutDuration The length of time to wait until timing out.  This is relative to the
   * current point in time.
   * @return <code>std::future_status::ready</code> if <code>get</code> can return immediately,
   * <code>std::future_status::timeout</code> if the function timed out while waiting.
   *
   * @note This function may invoke the functor if it is still logically queued, and
   * <code>std::launch::deferred</code> was specified at construction.
   **/
  template <class Rep, class Period>
  std::future_status wait_for(const std::chrono::duration<Rep, Period>& timeoutDuration) const {
    return Base::wait_for(timeoutDuration);
  }

  /**
   * Wait until <code>is_ready</code> is <code>true</code> or until timing out.
   *
   * @param timeoutTime The absolute time to wait until timing out.
   * @return <code>std::future_status::ready</code> if <code>get</code> can return immediately,
   * <code>std::future_status::timeout</code> if the function timed out while waiting.
   *
   * @note This function may invoke the functor if it is still logically queued, and
   * <code>std::launch::deferred</code> was specified at construction.
   **/
  template <class Clock, class Duration>
  std::future_status wait_until(const std::chrono::time_point<Clock, Duration>& timeoutTime) const {
    return Base::wait_until(timeoutTime);
  }

  /**
   * Provide a shared future.  <code>share</code> is here only to provide compatible api with
   * <code>std::experimental::future</code>, but Future already works like std::shared_future.
   **/
  Future share() {
    return std::move(*this);
  }

  /**
   * Get the result of the future functor.  This function blocks until the result is ready.
   *
   * @return A const reference to the result's value.
   **/
  const Result& get() const {
    wait();
    return this->impl_->result();
  }

  /**
   * Schedule a functor to be invoked upon reaching <code>is_ready</code> status, and return
   * a future that will hold the result of the functor.
   *
   * @param f The functor to be executed whose result will be available in the returned
   * <code>Future</code>.  This should have signature <code>Unpecified(Future<Result>&&)</code>.
   * @param sched The Schedulable in which to run the functor.
   * @param asyncPolicy if <code>std::launch::async</code> then the functor will attempt to be
   * queued on the backing work queue of the Schedulable (if any).
   * @param deferredPolicy if <code>std::launch::deferred</code>, <code>wait_for</code> and
   * <code>wait_until</code> of the returned future will be allowed to invoke the functor, otherwise
   * not.
   *
   * @return A future containing the result of the functor.
   **/
  template <typename F, typename Schedulable>
  Future<detail::ResultOf<F, Future<Result>&&>> then(
      F&& f,
      Schedulable& sched,
      std::launch asyncPolicy = kNotAsync,
      std::launch deferredPolicy = std::launch::deferred) {
    Future<detail::ResultOf<F, Future<Result>&&>> retFuture;
    retFuture.impl_ = this->template thenImpl<detail::ResultOf<F, Future<Result>&&>>(
        std::forward<F>(f), sched, asyncPolicy, deferredPolicy);
    return retFuture;
  }
  template <typename F>
  Future<detail::ResultOf<F, Future<Result>&&>> then(F&& f) {
    return then(std::forward<F>(f), globalThreadPool(), kNotAsync, std::launch::deferred);
  }

 private:
  template <typename T>
  Future(T&& t, detail::ReadyTag) {
    this->impl_ = detail::createValueFutureImplReady<Result>(std::forward<T>(t));
  }

  template <typename T>
  friend Future<std::decay_t<T>> make_ready_future(T&& t);

  template <typename R>
  friend class Future;
};

/**
 * @copydoc Future
 */
template <typename Result>
class Future<Result&> : detail::FutureBase<Result&> {
  using Base = detail::FutureBase<Result&>;

 public:
  /** Default constructor. */
  Future() noexcept : Base() {}
  /** Move constructor. */
  Future(Future&& f) noexcept : Base(std::move(f)) {}
  /** @copydoc Future(Future&&) */
  Future(Base&& f) noexcept : Base(std::move(f)) {}
  /** Copy constructor. */
  Future(const Future& f) noexcept : Base(f) {}
  /** @copydoc Future(const Future&) */
  Future(const Base& f) noexcept : Base(f) {}
  /** Construct with callable and schedulable. @see Future<Result>::Future(F&&, Schedulable&,
   * std::launch, std::launch) */
  template <typename F, typename Schedulable>
  Future(
      F&& f,
      Schedulable& schedulable,
      std::launch asyncPolicy = kNotAsync,
      std::launch deferredPolicy = std::launch::deferred)
      : Base(std::forward<F>(f), schedulable, asyncPolicy, deferredPolicy) {}
  Future& operator=(Future&& f) noexcept {
    Base::move(reinterpret_cast<Base&&>(f));
    return *this;
  }
  Future& operator=(const Future& f) {
    Base::copy(f);
    return *this;
  }
  ~Future() = default;
  using Base::is_ready;
  using Base::valid;
  using Base::wait;
  using Base::wait_for;
  using Base::wait_until;

  Future share() {
    return std::move(*this);
  }

  /**
   * Get the result of the future functor.  This function blocks until the result is ready.
   *
   * @return Access to the underlying reference.
   **/
  Result& get() const {
    wait();
    return this->impl_->result();
  }

  template <typename F, typename Schedulable>
  Future<detail::ResultOf<F, Future<Result&>&&>> then(
      F&& f,
      Schedulable& sched,
      std::launch asyncPolicy = kNotAsync,
      std::launch deferredPolicy = std::launch::deferred) {
    Future<detail::ResultOf<F, Future<Result&>&&>> retFuture;
    retFuture.impl_ = this->template thenImpl<detail::ResultOf<F, Future<Result&>&&>>(
        std::forward<F>(f), sched, asyncPolicy, deferredPolicy);
    return retFuture;
  }
  template <typename F>
  Future<detail::ResultOf<F, Future<Result&>&&>> then(F&& f) {
    return then(std::forward<F>(f), globalThreadPool(), kNotAsync, std::launch::deferred);
  }

 private:
  template <typename T>
  Future(std::reference_wrapper<T> t, detail::ReadyTag) {
    this->impl_ = detail::createRefFutureImplReady<Result>(t);
  }

  template <typename X>
  friend Future<X&> make_ready_future(std::reference_wrapper<X> x);

  template <typename R>
  friend class Future;
};

/**
 * @copydoc Future
 */
template <>
class Future<void> : detail::FutureBase<void> {
  using Base = detail::FutureBase<void>;

 public:
  /** Default constructor. */
  Future() noexcept : Base() {}
  /** Move constructor. */
  Future(Future&& f) noexcept : Base(std::move(f)) {}
  /** @copydoc Future(Future&&) */
  Future(Base&& f) noexcept : Base(std::move(f)) {}
  /** Copy constructor. */
  Future(const Future& f) noexcept : Base(f) {}
  /** @copydoc Future(const Future&) */
  Future(const Base& f) noexcept : Base(f) {}
  /** Construct with callable and schedulable. @see Future<Result>::Future(F&&, Schedulable&,
   * std::launch, std::launch) */
  template <typename F, typename Schedulable>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  Future(
      F&& f,
      Schedulable& schedulable,
      std::launch asyncPolicy = kNotAsync,
      std::launch deferredPolicy = std::launch::deferred)
      : Base(std::forward<F>(f), schedulable, asyncPolicy, deferredPolicy) {}
  Future& operator=(Future&& f) noexcept {
    Base::move(reinterpret_cast<Base&&>(f));
    return *this;
  }
  Future& operator=(const Future& f) {
    Base::copy(f);
    return *this;
  }
  ~Future() = default;
  using Base::is_ready;
  using Base::valid;
  using Base::wait;
  using Base::wait_for;
  using Base::wait_until;

  Future share() {
    return std::move(*this);
  }

  /**
   * Block until the functor has been called.
   **/
  void get() const {
    wait();
    this->impl_->result();
  }

  template <typename F, typename Schedulable>
  Future<detail::ResultOf<F, Future<void>&&>> then(
      F&& f,
      Schedulable& sched,
      std::launch asyncPolicy = kNotAsync,
      std::launch deferredPolicy = std::launch::deferred) {
    Future<detail::ResultOf<F, Future<void>&&>> retFuture;
    retFuture.impl_ = this->template thenImpl<detail::ResultOf<F, Future<void>&&>>(
        std::forward<F>(f), sched, asyncPolicy, deferredPolicy);
    return retFuture;
  }
  template <typename F>
  Future<detail::ResultOf<F, Future<void>&&>> then(F&& f) {
    return then(std::forward<F>(f), globalThreadPool(), kNotAsync, std::launch::deferred);
  }

 private:
  Future(detail::ReadyTag) {
    impl_ = detail::createVoidFutureImplReady();
  }

  friend Future<void> make_ready_future();

  template <typename R>
  friend class Future;
};

// TODO(bbudge): Determine if we should
// a. Expand launch policies, and logically inherit from std::launch and
// b. Whether async should truly mean on a new thread.
// For now we will treat std::launch::async such that we pass ForceQueuingTag

/**
 * Invoke a functor through the global dispenso thread pool.
 *
 * @param policy The bitmask policy for when/how the functor can be invoked.
 * <code>std::launch::async</code> will result in the functor being forced onto a ThreadPool work
 * queue.  <code>std::launch::deferred</code> specifies that <code>Future::wait_for</code> and
 * <code>Future::wait_until</code> may invoke the functor.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>> async(std::launch policy, F&& f, Args&&... args) {
  return Future<detail::ResultOf<F, Args...>>(
      std::bind(std::forward<F>(f), std::forward<Args>(args)...), globalThreadPool(), policy);
}

/**
 * Invoke a functor through the global dispenso thread pool.
 *
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>> async(F&& f, Args&&... args) {
  return ::dispenso::async(std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...);
}

/**
 * Invoke a functor through the specified dispenso thread pool.
 *
 * @param pool The ThreadPool to run the Future.
 * @param policy The bitmask policy for when/how the functor can be invoked.
 * <code>std::launch::async</code> will result in the functor being forced onto a ThreadPool work
 * queue.  <code>std::launch::deferred</code> specifies that <code>Future::wait_for</code> and
 * <code>Future::wait_until</code> may invoke the functor.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>>
async(ThreadPool& pool, std::launch policy, F&& f, Args&&... args) {
  return Future<detail::ResultOf<F, Args...>>(
      std::bind(std::forward<F>(f), std::forward<Args>(args)...), pool, policy);
}

/**
 * Invoke a functor through the specified dispenso thread pool.
 *
 * @param pool The ThreadPool to run the Future.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>> async(ThreadPool& pool, F&& f, Args&&... args) {
  return ::dispenso::async(
      pool, std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...);
}

/**
 * Invoke a functor through the specified dispenso TaskSet.
 *
 * @param tasks The TaskSet to run the Future.
 * @param policy The bitmask policy for when/how the functor can be invoked.
 * <code>std::launch::async</code> will result in the functor being forced onto a ThreadPool work
 * queue.  <code>std::launch::deferred</code> specifies that <code>Future::wait_for</code> and
 * <code>Future::wait_until</code> may invoke the functor.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>>
async(TaskSet& tasks, std::launch policy, F&& f, Args&&... args) {
  return Future<detail::ResultOf<F, Args...>>(
      std::bind(std::forward<F>(f), std::forward<Args>(args)...), tasks, policy);
}

/**
 * Invoke a functor through the specified dispenso TaskSet.
 *
 * @param tasks The TaskSet to run the Future.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>> async(TaskSet& tasks, F&& f, Args&&... args) {
  return ::dispenso::async(
      tasks, std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...);
}

/**
 * Invoke a functor through the specified dispenso ConcurrentTaskSet.
 *
 * @param tasks The ConcurrentTaskSet to run the Future.
 * @param policy The bitmask policy for when/how the functor can be invoked.
 * <code>std::launch::async</code> will result in the functor being forced onto a ThreadPool work
 * queue.  <code>std::launch::deferred</code> specifies that <code>Future::wait_for</code> and
 * <code>Future::wait_until</code> may invoke the functor.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>>
async(ConcurrentTaskSet& tasks, std::launch policy, F&& f, Args&&... args) {
  return Future<detail::ResultOf<F, Args...>>(
      std::bind(std::forward<F>(f), std::forward<Args>(args)...), tasks, policy);
}

/**
 * Invoke a functor through the specified dispenso ConcurrentTaskSet.
 *
 * @param tasks The ConcurrentTaskSet to run the Future.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>> async(ConcurrentTaskSet& tasks, F&& f, Args&&... args) {
  return ::dispenso::async(
      tasks, std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...);
}

/**
 * Invoke a functor on a new thread.
 *
 * @param sched A NewThreadInvoker
 * @param policy The bitmask policy for when/how the functor can be invoked.
 * <code>std::launch::async</code> will result in the functor being forced onto a ThreadPool work
 * queue.  <code>std::launch::deferred</code> specifies that <code>Future::wait_for</code> and
 * <code>Future::wait_until</code> may invoke the functor.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>>
async(NewThreadInvoker sched, std::launch policy, F&& f, Args&&... args) {
  return Future<detail::ResultOf<F, Args...>>(
      std::bind(std::forward<F>(f), std::forward<Args>(args)...), sched, policy);
}

/**
 * Invoke a functor on a new thread.
 *
 * @param sched A NewThreadInvoker.
 * @param f The functor to be passed, or a function to be executed
 * @param args The remaining arguments that will be passed to <code>f</code>
 **/
template <class F, class... Args>
inline Future<detail::ResultOf<F, Args...>> async(NewThreadInvoker sched, F&& f, Args&&... args) {
  return ::dispenso::async(
      sched, std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...);
}

/**
 * Make a <code>Future</code> in a ready state with the value passed into
 * <code>make_ready_future</code>
 *
 * @param t the value to use to create the returned future.
 **/
template <typename T>
inline Future<std::decay_t<T>> make_ready_future(T&& t) {
  return Future<std::decay_t<T>>(std::forward<T>(t), detail::ReadyTag());
}

/**
 * Make a <code>Future</code> in a ready state with the <code>reference_wrapper</code> passed into
 * <code>make_ready_future</code>
 *
 * @param x the wrapped reference to use to create the returned future.
 **/
template <typename X>
inline Future<X&> make_ready_future(std::reference_wrapper<X> x) {
  return Future<X&>(x, detail::ReadyTag());
}

/**
 * Make a <code>Future<void></code> in a ready state.
 *
 **/
inline Future<void> make_ready_future() {
  return Future<void>(detail::ReadyTag());
}

/**
 * Take a collection of futures, and return a future which will be ready when all input futures are
 * ready.
 *
 * @param first An iterator to the start of the future collection.
 * @param last An iterator to the end of the future collection.
 *
 * @return A Future containing a vector holding copies of the input Futures.  The returned Future
 * will be in ready state when all input Futures are ready.
 *
 **/
template <class InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>> when_all(
    InputIt first,
    InputIt last);

/**
 * Take a specific set of futures, and return a future which will be ready when all input futures
 *are ready.
 *
 * @param futures A parameter pack of futures.
 *
 * @return A Future containing a tuple holding copies of the input Futures.  The returned Future
 * will be in ready state when all input Futures are ready.
 *
 **/
template <class... Futures>
auto when_all(Futures&&... futures) -> Future<std::tuple<std::decay_t<Futures>...>>;

/**
 * Take a collection of futures, and return a future which will be ready when all input futures are
 * ready.
 *
 * @param taskSet A task set to register with such that after this call,
 * <code>taskSet::wait()</code> implies that the resultant future <code>is_ready()</code>
 * @param first An iterator to the start of the future collection.
 * @param last An iterator to the end of the future collection.
 *
 * @return A Future containing a vector holding copies of the input Futures.  The returned Future
 * will be in ready state when all input Futures are ready.
 *
 **/
template <class InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>>
when_all(TaskSet& taskSet, InputIt first, InputIt last);
/** @overload */
template <class InputIt>
Future<std::vector<typename std::iterator_traits<InputIt>::value_type>>
when_all(ConcurrentTaskSet& taskSet, InputIt first, InputIt last);

/**
 * Take a specific set of futures, and return a future which will be ready when all input futures
 *are ready.
 *
 * @param taskSet A task set to register with such that after this call,
 * <code>taskSet::wait()</code> implies that the resultant future <code>is_ready()</code>
 * @param futures A parameter pack of futures.
 *
 * @return A Future containing a tuple holding copies of the input Futures.  The returned Future
 * will be in ready state when all input Futures are ready.
 *
 **/
template <class... Futures>
auto when_all(TaskSet& taskSet, Futures&&... futures)
    -> Future<std::tuple<std::decay_t<Futures>...>>;

/** @overload */
template <class... Futures>
auto when_all(ConcurrentTaskSet& taskSet, Futures&&... futures)
    -> Future<std::tuple<std::decay_t<Futures>...>>;
} // namespace dispenso

#include <dispenso/detail/future_impl2.h>


================================================
FILE: dispenso/graph.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/graph.h>

#include <iterator>
#include <mutex>

namespace {
constexpr size_t kToDelete = std::numeric_limits<size_t>::max();

void set_union(
    std::vector<const dispenso::BiPropNode*>& s1,
    const std::vector<const dispenso::BiPropNode*>& s2) {
  std::vector<const dispenso::BiPropNode*> tmp(s1);
  s1.clear();
  std::set_union(tmp.cbegin(), tmp.cend(), s2.cbegin(), s2.cend(), std::back_inserter(s1));
}

void set_insert(std::vector<const dispenso::BiPropNode*>& s, const dispenso::BiPropNode* node) {
  auto it = std::upper_bound(s.begin(), s.end(), node);
  if (it == s.begin() || *(it - 1) != node) {
    s.insert(it, node);
  }
}
} // anonymous namespace

namespace dispenso {

void BiPropNode::biPropDependsOnOneNode(BiPropNode& node) {
  Node::dependsOnOneNode(node);
  if (node.biPropSet_ == nullptr && biPropSet_ == nullptr) {
    biPropSet_ = std::make_shared<std::vector<const BiPropNode*>>();
    set_insert(*biPropSet_, this);
    set_insert(*biPropSet_, &node);
    node.biPropSet_ = biPropSet_;
  } else if (node.biPropSet_ != nullptr && biPropSet_ != nullptr) {
    set_union(*biPropSet_, *node.biPropSet_);
    node.biPropSet_ = biPropSet_;
  } else if (biPropSet_ == nullptr) {
    biPropSet_ = node.biPropSet_;
    set_insert(*biPropSet_, this);
  } else {
    node.biPropSet_ = biPropSet_;
    set_insert(*biPropSet_, &node);
  }
}

template <class N>
void SubgraphT<N>::clear() {
  decrementDependentCounters();
  const size_t numGraphPredecessors = markNodesWithPredicessors();
  if (numGraphPredecessors != 0) {
    removePredecessorDependencies(numGraphPredecessors);
  }
  destroyNodes();
}

template <class N>
void SubgraphT<N>::destroyNodes() {
  for (NodeType* n : nodes_) {
    n->~NodeType();
  }
  allocator_->clear();
  nodes_.clear();
}

template <class N>
SubgraphT<N>::~SubgraphT() {
  for (NodeType* n : nodes_) {
    n->~NodeType();
  }
}

template <class N>
void SubgraphT<N>::decrementDependentCounters() {
  for (N* node : nodes_) {
    for (Node* const dependent : node->dependents_) {
      dependent->numPredecessors_--;
    }
    removeNodeFromBiPropSet(node);
  }
}

template <class N>
size_t SubgraphT<N>::markNodesWithPredicessors() {
  size_t numGraphPredecessors = 0;
  for (N* node : nodes_) {
    if (node->numPredecessors_ != 0) {
      numGraphPredecessors += node->numPredecessors_;
      node->numPredecessors_ = kToDelete;
    }
  }
  return numGraphPredecessors;
}

template <class N>
void SubgraphT<N>::removePredecessorDependencies(size_t numGraphPredecessors) {
  for (SubgraphT<N>& subgraph : graph_->subgraphs_) {
    if (&subgraph == this) {
      continue;
    }
    for (N* node : subgraph.nodes_) {
      auto& dependents = node->dependents_;
      size_t num = dependents.size();
      for (size_t i = 0; i < num;) {
        if (dependents[i]->numPredecessors_ == kToDelete) {
          dependents[i] = dependents[num - 1];
          --num;
          if (--numGraphPredecessors == 0) {
            dependents.resize(num);
            return;
          }
        } else {
          i++;
        }
      }
      dependents.resize(num);
    }
  }
}

namespace {
constexpr size_t kMaxCache = 8;
// Don't cache too-large allocators.  This way we will have at most 8*(2**16) = 512K outstanding
// nodes worth of memory per node type.
// TODO(bbudge): Make these caching values macro configurable for lightweight platforms.
constexpr size_t kMaxChunkCapacity = 1 << 16;

using AlignedNodePoolPtr =
    std::unique_ptr<NoLockPoolAllocator, detail::AlignedFreeDeleter<NoLockPoolAllocator>>;

std::vector<AlignedNodePoolPtr> g_sgcache[2];
std::mutex g_sgcacheMtx;

template <class T>
constexpr size_t kCacheIndex = size_t{std::is_same<T, BiPropNode>::value};

} // namespace

template <class N>
typename SubgraphT<N>::PoolPtr SubgraphT<N>::getAllocator() {
  AlignedNodePoolPtr ptr;

  auto& cache = g_sgcache[kCacheIndex<N>];

  {
    std::lock_guard<std::mutex> lk(g_sgcacheMtx);
    if (cache.empty()) {
      void* alloc =
          detail::alignedMalloc(sizeof(NoLockPoolAllocator), alignof(NoLockPoolAllocator));
      auto* pool = new (alloc)
          NoLockPoolAllocator(sizeof(NodeType), 128 * sizeof(NodeType), ::malloc, ::free);
      ptr.reset(pool);
    } else {
      ptr = std::move(cache.back());
      ptr->clear();
      cache.pop_back();
    }
  }
  return PoolPtr(ptr.release(), releaseAllocator);
}

template <class N>
void SubgraphT<N>::releaseAllocator(NoLockPoolAllocator* ptr) {
  if (!ptr) {
    return;
  }
  if (ptr->totalChunkCapacity() < kMaxChunkCapacity) {
    auto& cache = g_sgcache[kCacheIndex<N>];
    {
      std::lock_guard<std::mutex> lk(g_sgcacheMtx);
      if (cache.size() < kMaxCache) {
        cache.emplace_back(ptr);
        return;
      }
    }
  }
  detail::AlignedFreeDeleter<NoLockPoolAllocator>()(ptr);
}

template <class N>
GraphT<N>::GraphT(GraphT<N>&& other) : subgraphs_(std::move(other.subgraphs_)) {
  for (SubgraphT<N>& subgraph : subgraphs_) {
    subgraph.graph_ = this;
  }
}

template <class N>
GraphT<N>& GraphT<N>::operator=(GraphT&& other) noexcept {
  subgraphs_ = std::move(other.subgraphs_);
  for (SubgraphT<N>& subgraph : subgraphs_) {
    subgraph.graph_ = this;
  }
  return *this;
}

template <class N>
SubgraphT<N>& GraphT<N>::addSubgraph() {
  subgraphs_.push_back(SubgraphType(this));
  return subgraphs_.back();
}

template class DISPENSO_DLL_ACCESS SubgraphT<Node>;
template class DISPENSO_DLL_ACCESS SubgraphT<BiPropNode>;
template class DISPENSO_DLL_ACCESS GraphT<Node>;
template class DISPENSO_DLL_ACCESS GraphT<BiPropNode>;
} // namespace dispenso


================================================
FILE: dispenso/graph.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file graph.h
 * @ingroup group_graph
 * A file providing task graph functionality for expressing complex task dependencies.
 **/

#pragma once

#include <atomic>
#include <deque>
#include <limits>
#include <memory>
#include <type_traits>
#include <vector>

#include <dispenso/once_function.h>
#include <dispenso/platform.h>
#include <dispenso/pool_allocator.h>
#include <dispenso/small_buffer_allocator.h>
#include <dispenso/small_vector.h>

/**
 * @def DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE
 * @brief Number of dependent node pointers to store inline in each Node.
 *
 * Most graph nodes have few dependents (1-4 is common). This value controls
 * how many dependent pointers are stored inline before heap allocation.
 * Larger values reduce heap allocations but increase Node size.
 * Define before including graph.h to customize.
 **/
#ifndef DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE
#define DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE 4
#endif

static_assert(
    DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE >= 1 && DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE <= 64,
    "DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE must be between 1 and 64");

/*
Terminology
--------------------------------------------------------------------------------
The Node depends on the predecessor. The dependent depends on the node.
~~~
┌─────────────┐     ┌──────┐     ┌───────────┐
│ predecessor │ ──▶ │ node │ ──▶ │ dependent │
└─────────────┘     └──────┘     └───────────┘
~~~

Graph construction
--------------------------------------------------------------------------------
The Graph class can be used to created tasks with dependencies and execute it once.
Graphs must not contain cycles.
Example:
~~~
//
//   ┌────────────┐     ┌───────────────┐     ┌───────────────────┐
//   │ 0: r[0]+=1 │ ──▶ │ 1: r[1]=r[0]*2│ ──▶ │ 4: r[4]=r[1]+r[3] │
//   └────────────┘     └───────────────┘     └───────────────────┘
//                                              ▲
//   ┌────────────┐     ┌───────────────┐       │
//   │ 2: r[2]+=8 │ ──▶ │ 3: r[3]=r[2]/2│ ──────┘
//   └────────────┘     └───────────────┘

std::array<float, 5> r;

dispenso::Graph graph;

dispenso::Node& N0 = graph.addNode([&]() { r[0] += 1; });
dispenso::Node& N2 = graph.addNode([&]() { r[2] += 8; });
dispenso::Node& N1 = graph.addNode([&]() { r[1] += r[0] * 2; });
dispenso::Node& N3 = graph.addNode([&]() { r[3] += r[2] / 2; });
dispenso::Node& N4 = graph.addNode([&]() { r[4] += r[1] + r[3]; });

N4.dependsOn(N1, N3);
N1.dependsOn(N0);
N3.dependsOn(N2);

dispenso::TaskSet taskSet(dispenso::globalThreadPool());
dispenso::ParallelForExecutor parallelForExecutor;
parallelForExecutor(taskSet, graph);
~~~

Partial revaluation
--------------------------------------------------------------------------------
If graph is big or we need to recompute graph partially we can execute it again.
After execution of the graph all nodes change their state from "incomplete" to
"completed". If order to evaluate whole graph again we can use function `setAllNodesIncomplete`
Example:
~~~
r = {0, 0, 0, 0, 0};
setAllNodesIncomplete(graph);
parallelForExecutor(taskSet, graph);
~~~

The graph can be recomputed partially if we have new input data for one or several nodes in the
graph. It order to do it we need to call `setIncomplete()` method for every node which we need to
recompute and after use functor `ForwardPropagator` to mark as "incomplete" all dependents.

Example:
~~~
N1.setIncomplete();
r[1] = r[4] = 0;
ForwardPropagator forwardPropagator;
forwardPropagator(graph);
evaluateGraph(graph);
~~~
In this exaple only node 1 and 4 will be invoked.

 Subgraphs
--------------------------------------------------------------------------------
It is possible to organize nodes into subgraphs and destroy and recreate if we have static and
dynamic parts of the computation graph
Example:
~~~
//
// ∙----subgraph1---∙ ∙---subgraph2-------∙
// ¦ ┌────────────┐ ¦ ¦ ┌───────────────┐ ¦   ┌───────────────────┐
// ¦ │ 0: r[0]+=1 │ ──▶ │ 1: r[1]=r[0]*2│ ──▶ │ 4: r[4]=r[1]+r[3] │
// ¦ └────────────┘ ¦ ¦ └───────────────┘ ¦   └───────────────────┘
// ¦                ¦ ¦                   ¦     ▲
// ¦ ┌────────────┐ ¦ ¦ ┌───────────────┐ ¦     │
// ¦ │ 2: r[2]+=8 │ ──▶ │ 3: r[3]=r[2]/2│ ──────┘
// ¦ └────────────┘ ¦ ¦ └───────────────┘ ¦
// ∙----------------∙ ∙-------------------∙
std::array<float, 5> r;
dispenso::Graph graph;

dispenso::Subgraph& subgraph1 = graph.addSubgraph();
dispenso::Subgraph& subgraph2 = graph.addSubgraph();

dispenso::Node& N0 = subgraph1.addNode([&]() { r[0] += 1; });
dispenso::Node& N2 = subgraph1.addNode([&]() { r[2] += 8; });
dispenso::Node& N1 = subgraph2.addNode([&]() { r[1] += r[0] * 2; });
dispenso::Node& N3 = subgraph2.addNode([&]() { r[3] += r[2] / 2; });
dispenso::Node& N4 = graph.addNode([&]() { r[4] += r[1] + r[3]; });

N4.dependsOn(N1, N3);
N1.dependsOn(N0);
N3.dependsOn(N2);

// evaluate graph first time
r = {0, 0, 0, 0, 0};
dispenso::ConcurrentTaskSet concurrentTaskSet(dispenso::globalThreadPool());
dispenso::ConcurrentTaskSetExecutor concurrentTaskSetExecutor;
concurrentTaskSetExecutor(concurrentTaskSet, graph);

// disconnect and destroy nodes of subgraph2
// it invalidates node references/pointers of this subgraph
subgraph2.clear();

// create another nodes
dispenso::Node& newN1 = subgraph2.addNode([&]() { r[1] += r[0] * 20; });
dispenso::Node& newN3 = subgraph2.addNode([&]() { r[3] += r[2] / 20; });
newN1.dependsOn(N0);
newN3.dependsOn(N2);
N4.dependsOn(newN1, newN3);

// and revaluae the graph
setAllNodesIncomplete(movedGraph);
concurrentTaskSetExecutor(concurrentTaskSet, graph);
~~~

Bidirectional propagation dependency
--------------------------------------------------------------------------------
In certain scenarios, nodes may alter the same memory. In such instances, it becomes necessary to
compute the predecessors of the node, even if they possess a "completed" state following state
propagation. To facilitate this process automatically, we introduce the notion of a bidirectional
propagation dependency (`BiProp`).

Example:
~~~
//                     ┌─────────────────┐
//                     │ 3: m3+=b*b      │
//                     └─────────────────┘
//                       ▲
// ┌−-----------−−−−−−−−−│−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−┐
// ╎ ┌───────────┐     ┌─────────────────┐     ┌────────────┐ ╎
// ╎ │  0: b+=5  │ ──▷ │     1: b*=5     │ ──▷ │ 2: b/=m4   │ ╎
// ╎ └───────────┘     └─────────────────┘     └────────────┘ ╎
// └−−−-−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−▲−−−−−−−−−−−−┘
//  Legend:                                      │
//  ──▶ Normal dependency                      ┌────────────┐
//  ──▷ Bidirectional propagation dependency   │ 4: m4+=2   │
//  m4  variable modified only in node 4       └────────────┘

float  b,  m3, m4
dispenso::BiPropGraph g;
std::array<dispenso::BiPropNode*, 8> N;

dispenso::BiPropNode& N0 = g.addNode([&]() { b += 5; });
dispenso::BiPropNode& N1 = g.addNode([&]() { b *= 5; });
dispenso::BiPropNode& N2 = g.addNode([&]() { b /= m4; });
dispenso::BiPropNode& N3 = g.addNode([&]() { m3 += b*b; });
dispenso::BiPropNode& N4 = g.addNode([&]() { m4 += 2; });

N3.dependsOn(N1);
N2.dependsOn(N4);
N2.biPropDependsOn(N1);
N1.biPropDependsOn(N0);

// first execution
b = m3 = m4 = 0.f;
dispenso::ConcurrentTaskSet concurrentTaskSet(dispenso::globalThreadPool());
dispenso::ConcurrentTaskSetExecutor concurrentTaskSetExecutor;
concurrentTaskSetExecutor(concurrentTaskSet, g);

N[4].setIncomplete();
// if node 4 is incomplete after propagation node 2 become incomplete. Taking in account that node 2
// bidirectionally depends on nodes 0 and 1 they will be marked as incomplete as well
b =  m4 = 0.f;
ForwardPropagator forwardPropagator;
forwardPropagator(g);
concurrentTaskSetExecutor(concurrentTaskSet, g);
~~~

Please read tests from `graph_test.cpp` for more examples.
*/

namespace detail {
class ExecutorBase;

template <typename F>
void callFunctor(void* ptr) {
  (*static_cast<F*>(ptr))();
}

template <typename F>
void destroyFunctor(void* ptr) {
  static_cast<F*>(ptr)->~F();
  constexpr size_t kFuncSize = static_cast<size_t>(dispenso::detail::nextPow2(sizeof(F)));
  dispenso::deallocSmallBuffer<kFuncSize>(ptr);
}

} // namespace detail

namespace dispenso {
/**
 * Class to store task with dependencies
 **/
class Node {
 public:
  Node() = delete;
  Node(const Node&) = delete;
  Node& operator=(const Node&) = delete;
  /** Move constructor. */
  Node(Node&& other) noexcept
      : numIncompletePredecessors_(other.numIncompletePredecessors_.load()),
        numPredecessors_(other.numPredecessors_),
        invoke_(other.invoke_),
        destroy_(other.destroy_),
        funcBuffer_(other.funcBuffer_),
        dependents_(std::move(other.dependents_)) {
    other.funcBuffer_ = nullptr;
  }
  ~Node() {
    if (funcBuffer_) {
      destroy_(funcBuffer_);
    }
  }
  /**
   * Make this node depends on nodes. This is not concurrency safe.
   *
   * @param nodes predecessors of the node
   **/
  template <typename... Ns>
  inline void dependsOn(Ns&... nodes) {
    ((void)std::initializer_list<int>{(dependsOnOneNode(nodes), 0)...});
  }
  /**
   * Invoke the type-erased functor. Change competed state of the node to "Incomplete".
   * Concurrency safe.
   **/
  inline void run() const {
    invoke_(funcBuffer_);
    numIncompletePredecessors_.store(kCompleted, std::memory_order_release);
  }
  /**
   * apply an func to each dependent of the node
   *
   * @param func a functor with signature <code>void(const Node&)</code>
   **/
  template <class F>
  inline void forEachDependent(F&& func) const {
    for (const Node* dependent : dependents_) {
      func(*dependent);
    }
  }
  /**
   * apply an func to each dependent of the node This is not concurrency safe.
   *
   * @param func a functor with signature <code>void(Node&)</code>
   **/
  template <class F>
  inline void forEachDependent(F&& func) {
    for (Node* dependent : dependents_) {
      func(*dependent);
    }
  }
  /**
   * Return the number of nodes this node depends on. Concurrency safe.
   **/
  inline size_t numPredecessors() const {
    return numPredecessors_;
  }
  /**
   * Return true if node is completed.
   * New node always incomplete. If node was invoked it become completed. this
   * state can be changed by calling <code>setIncomplete()</code>
   * Concurrency safe.
   **/
  inline bool isCompleted() const {
    return numIncompletePredecessors_.load(std::memory_order_relaxed) == kCompleted;
  }
  /**
   * Mark node incomplete. (that allows reevaluate this node again).
   * Concurrency safe. This methods should never be called concurrent to when
   * the graph execution is happening
   *
   * @return true if state was changed.
   **/
  inline bool setIncomplete() const {
    if (numIncompletePredecessors_.load(std::memory_order_relaxed) == kCompleted) {
      numIncompletePredecessors_.store(0, std::memory_order_relaxed);
      return true;
    }
    return false;
  }

  /**
   * Mark node completed.
   * Concurrency safe. This methods should never be called concurrent to when
   * the graph execution is happening
   **/
  inline void setCompleted() const {
    numIncompletePredecessors_.store(kCompleted, std::memory_order_relaxed);
  }

 protected:
  /** Construct a Node with a functor. @param f The functor to execute when the node runs. */
  template <class F, class X = std::enable_if_t<!std::is_base_of<Node, F>::value, void>>
  Node(F&& f) : numIncompletePredecessors_(0) {
    using FNoRef = typename std::remove_reference<F>::type;

    constexpr size_t kFuncSize = static_cast<size_t>(detail::nextPow2(sizeof(FNoRef)));
    funcBuffer_ = allocSmallBuffer<kFuncSize>();
    new (funcBuffer_) FNoRef(std::forward<F>(f));
    invoke_ = ::detail::callFunctor<FNoRef>;
    destroy_ = ::detail::destroyFunctor<FNoRef>;
  }

  void dependsOnOneNode(Node& node) {
    node.dependents_.emplace_back(this);
    numPredecessors_++;
  }

  static constexpr size_t kCompleted = std::numeric_limits<size_t>::max();
  mutable std::atomic<size_t> numIncompletePredecessors_;
  size_t numPredecessors_ = 0;

 private:
  using InvokerType = void (*)(void* ptr);

  InvokerType invoke_;
  InvokerType destroy_;
  char* funcBuffer_;

  SmallVector<Node*, DISPENSO_GRAPH_DEPENDENTS_INLINE_SIZE> dependents_; // nodes depend on this

  template <class N>
  friend class SubgraphT;
  friend class ::detail::ExecutorBase;
  template <typename G>
  friend void setAllNodesIncomplete(const G& graph);
};
/**
 * Class to store task with dependencies. Support bidirectional propagation dependency between
 *nodes.
 **/
class BiPropNode : public Node {
 public:
  BiPropNode() = delete;
  BiPropNode(const BiPropNode&) = delete;
  BiPropNode& operator=(const BiPropNode&) = delete;
  /** Move constructor. */
  BiPropNode(BiPropNode&& other) noexcept
      : Node(std::move(other)), biPropSet_(std::move(other.biPropSet_)) {}
  /**
   * Make this node depends on nodes. Create bidirectional propagation dependency. This is not
   *concurrency safe.
   *
   * @param nodes predecessors of the node
   **/
  template <class... Ns>
  inline void biPropDependsOn(Ns&... nodes) {
    ((void)std::initializer_list<int>{(biPropDependsOnOneNode(nodes), 0)...});
  }
  /**
   * Return true if node belongs to the same propogation set. (That means both nodes after
   * propogation become completed/incomplete together.)
   *
   * @param node to test
   **/
  inline bool isSameSet(const BiPropNode& node) const {
    return biPropSet_ && biPropSet_ == node.biPropSet_;
  }

 private:
  template <class T, class X = std::enable_if_t<!std::is_base_of<BiPropNode, T>::value, void>>
  BiPropNode(T&& f) : Node(std::forward<T>(f)) {}
  inline void removeFromBiPropSet() {
    if (biPropSet_ != nullptr) {
      auto it = std::find(biPropSet_->begin(), biPropSet_->end(), this);
      if (it != biPropSet_->end()) {
        biPropSet_->erase(it);
      }
    }
  }

  DISPENSO_DLL_ACCESS void biPropDependsOnOneNode(BiPropNode& node);

  std::shared_ptr<std::vector<const BiPropNode*>> biPropSet_;

  template <class N>
  friend class SubgraphT;
  friend class ::detail::ExecutorBase;
};

template <class N>
class GraphT;

/**
 * A subgraph within a Graph, containing a collection of nodes that can be executed together.
 *
 * @tparam N The node type (Node or BiPropNode).
 */
template <class N>
class DISPENSO_DLL_ACCESS SubgraphT {
 public:
  using NodeType = N;
  SubgraphT() = delete;
  SubgraphT(const SubgraphT<N>&) = delete;
  SubgraphT<N>& operator=(const SubgraphT<N>&) = delete;
  /** Move constructor. */
  SubgraphT(SubgraphT<N>&& other) noexcept
      : graph_(other.graph_),
        nodes_(std::move(other.nodes_)),
        allocator_(std::move(other.allocator_)) {}
  ~SubgraphT();
  /**
   * Construct a <code>NodeType</code> with a valid functor. This is not concurrency safe.
   *
   * @param f A functor with signature void().
   * @return reference to the created node.
   **/
  template <class T>
  DISPENSO_REQUIRES(OnceCallableFunc<T>)
  N& addNode(T&& f) {
    nodes_.push_back(new (allocator_->alloc()) NodeType(std::forward<T>(f)));
    return *nodes_.back();
  }
  /**
   * Return number of nodes in subgraph. Concurrency safe.
   **/
  size_t numNodes() const {
    return nodes_.size();
  }
  /**
   * Reserve capacity for the specified number of nodes.
   * This can improve performance when the number of nodes is known in advance
   * by avoiding vector reallocations during addNode calls.
   * This is not concurrency safe.
   *
   * @param n - number of nodes to reserve capacity for
   **/
  void reserve(size_t n) {
    nodes_.reserve(n);
  }
  /**
   * Return const reference to node with index. Concurrency safe.
   *
   * @param index - index of the node
   **/
  const N& node(size_t index) const {
    return *nodes_[index];
  }
  /**
   * Return reference to node with index. Concurrency safe.
   *
   * @param index - index of the node
   **/
  N& node(size_t index) {
    return *nodes_[index];
  }
  /**
   * apply an func to each node of the subgraph. Concurrency safe.
   *
   * @param func a functor with signature <code>void(const Node&)</code>
   **/
  template <class F>
  inline void forEachNode(F&& func) const {
    for (const N* node : nodes_) {
      func(*node);
    }
  }
  /**
   * apply an func to each node of the subgraph. This is not concurrency safe.
   * This methods should never be called concurrent to when the graph execution is happening
   *
   * @param func a functor with signature <code>void(Node&)</code>
   **/
  template <class F>
  inline void forEachNode(F&& func) {
    for (N* node : nodes_) {
      func(*node);
    }
  }
  /**
   * Removes all dependency between nodes of this subgraph and other nodes, destroy this subgraph
   * nodes. This is not concurrency safe.
   **/
  void clear();

 private:
  using DeallocFunc = void (*)(NoLockPoolAllocator*);
  using PoolPtr = std::unique_ptr<NoLockPoolAllocator, DeallocFunc>;

  static constexpr size_t kNodeSizeP2 = static_cast<size_t>(detail::nextPow2(sizeof(NodeType)));

  explicit SubgraphT(GraphT<N>* graph) : graph_(graph), nodes_(), allocator_(getAllocator()) {}

  inline void removeNodeFromBiPropSet(Node* /* node */) {}
  void removeNodeFromBiPropSet(BiPropNode* node) {
    node->removeFromBiPropSet();
  }
  void decrementDependentCounters();
  size_t markNodesWithPredicessors();
  void removePredecessorDependencies(size_t numGraphPredecessors);

  void destroyNodes();

  static PoolPtr getAllocator();
  static void releaseAllocator(NoLockPoolAllocator* ptr);

  GraphT<N>* graph_;
#if defined(_WIN32) && !defined(__MINGW32__)
#pragma warning(push)
#pragma warning(disable : 4251)
#endif
  std::vector<N*> nodes_;

  PoolPtr allocator_;
#if defined(_WIN32) && !defined(__MINGW32__)
#pragma warning(pop)
#endif

  template <class T>
  friend class GraphT;
};

/**
 * A directed acyclic graph (DAG) for expressing task dependencies.
 *
 * GraphT manages a collection of subgraphs, each containing nodes that represent work to be done.
 * Nodes can have dependencies on other nodes, and the graph executor ensures nodes run only after
 * their dependencies complete.
 *
 * @tparam N The node type (Node or BiPropNode).
 */
template <class N>
class DISPENSO_DLL_ACCESS GraphT {
 public:
  using NodeType = N;
  using SubgraphType = SubgraphT<N>;
  GraphT(const GraphT<N>&) = delete;
  GraphT& operator=(const GraphT<N>&) = delete;
  /**
   * Create empty graph.
   **/
  GraphT() {
    subgraphs_.push_back(SubgraphType(this));
  }
  /**
   * Move constructor
   **/
  GraphT(GraphT<N>&& other);
  /**
   * Move assignment operator
   **/
  GraphT<N>& operator=(GraphT&& other) noexcept;
  /**
   * Construct a <code>NodeType</code> with a valid functor. This node is created into subgraph 0.
   * This is not concurrency safe.
   *
   * @param f A functor with signature void().
   **/
  template <class T>
  DISPENSO_REQUIRES(OnceCallableFunc<T>)
  N& addNode(T&& f) {
    return subgraphs_[0].addNode(std::forward<T>(f));
  }
  /**
   * Return number of nodes in subgraph 0. Concurrency safe.
   **/
  size_t numNodes() const {
    return subgraphs_[0].numNodes();
  }
  /**
   * Return const reference to node with index in subgraph 0. Concurrency safe.
   *
   * @param index - index of the node
   **/
  const N& node(size_t index) const {
    return subgraphs_[0].node(index);
  }
  /**
   * Return reference to node with index in subgraph 0. Concurrency safe.
   *
   * @param index - index of the node
   **/
  N& node(size_t index) {
    return subgraphs_[0].node(index);
  }
  /**
   * Create an empty subgraph. This is not concurrency safe.
   **/
  SubgraphT<N>& addSubgraph();
  /**
   * Return number of subgraphs in the graph including subgraph 0. Concurrency safe.
   **/
  size_t numSubgraphs() const {
    return subgraphs_.size();
  }
  /**
   * Return const reference to subgraph with index. Concurrency safe.
   *
   * @param index - index of the subgraph.
   **/
  const SubgraphT<N>& subgraph(size_t index) const {
    return subgraphs_[index];
  }
  /**
   * Return reference to subgraph with index. Concurrency safe.
   *
   * @param index - index of the subgraph.
   **/
  SubgraphT<N>& subgraph(size_t index) {
    return subgraphs_[index];
  }
  /**
   * apply an func to each subgraph in the graph. Concurrency safe.
   *
   * @param func a functor with signature <code>void(const SubgraphT<N>&)</code>
   **/
  template <class F>
  inline void forEachSubgraph(F&& func) const {
    for (const SubgraphT<N>& subgraph : subgraphs_) {
      func(subgraph);
    }
  }
  /**
   * apply an func to each subgraph in the graph. Concurrency safe.
   *
   * @param func a functor with signature <code>void(SubgraphT<N>&)</code>
   **/
  template <class F>
  inline void forEachSubgraph(F&& func) {
    for (SubgraphT<N>& subgraph : subgraphs_) {
      func(subgraph);
    }
  }
  /**
   * apply an func to each node in the graph including all nodes from all subgraphs. Concurrency
   * safe.
   *
   * @param func a functor with signature <code>void(const Node&)</code>
   **/
  template <class F>
  inline void forEachNode(F&& func) const {
    for (const SubgraphT<N>& subgraph : subgraphs_) {
      for (const N* node : subgraph.nodes_) {
        func(*node);
      }
    }
  }
  /**
   * apply an func to each node in the graph. Concurrency safe.
   *
   * @param func a functor with signature <code>void(const Node&)</code>
   **/
  template <class F>
  inline void forEachNode(F&& func) {
    for (SubgraphT<N>& subgraph : subgraphs_) {
      for (N* node : subgraph.nodes_) {
        func(*node);
      }
    }
  }
  /**
   * Destroy all nodes and subgraphs. This is not concurrency safe.
   **/
  inline void clear() {
    subgraphs_.clear();
    subgraphs_.push_back(SubgraphType(this));
  }
  /**
   * Destroy all nodes. Keeps subgraphs. This is not concurrency safe.
   **/
  inline void clearSubgraphs() {
    for (SubgraphT<N>& subgraph : subgraphs_) {
      subgraph.destroyNodes();
    }
  }

 private:
  static constexpr size_t kSubgraphSizeP2 =
      static_cast<size_t>(detail::nextPow2(sizeof(SubgraphType)));

#if defined(_WIN32) && !defined(__MINGW32__)
#pragma warning(push)
#pragma warning(disable : 4251)
#endif
  std::deque<SubgraphT<N>> subgraphs_;
#if defined(_WIN32) && !defined(__MINGW32__)
#pragma warning(pop)
#endif

  template <class T>
  friend class SubgraphT;
};

/** A DAG for task scheduling with simple dependency tracking. */
using Graph = GraphT<Node>;
/** A DAG for task scheduling with bidirectional dependency propagation. */
using BiPropGraph = GraphT<BiPropNode>;

/** A subgraph within a Graph. */
using Subgraph = SubgraphT<Node>;
/** A subgraph within a BiPropGraph. */
using BiPropSubgraph = SubgraphT<BiPropNode>;
} // namespace dispenso


================================================
FILE: dispenso/graph_executor.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/graph_executor.h>

namespace dispenso {

template <typename G>
void SingleThreadExecutor::operator()(const G& graph) {
  using NodeType = typename G::NodeType;
  nodesToExecute_.clear();
  nodesToExecuteNext_.clear();

  // Count total nodes to estimate capacity needs
  size_t nodeCount = 0;
  graph.forEachNode([&](const NodeType& node) {
    ++nodeCount;
    if (hasNoIncompletePredecessors(node)) {
      nodesToExecute_.emplace_back(&node);
    }
  });

  // Reserve capacity to avoid reallocations during execution
  // Use a fraction of total nodes as estimate for max wave size
  size_t estimatedWaveSize = std::max(nodesToExecute_.size(), nodeCount / 4);
  nodesToExecute_.reserve(estimatedWaveSize);
  nodesToExecuteNext_.reserve(estimatedWaveSize);

  while (!nodesToExecute_.empty()) {
    for (const Node* n : nodesToExecute_) {
      const NodeType* node = static_cast<const NodeType*>(n);
      node->run();
      node->forEachDependent([&](const Node& d) {
        if (decNumIncompletePredecessors(
                static_cast<const NodeType&>(d), std::memory_order_relaxed)) {
          nodesToExecuteNext_.emplace_back(static_cast<const NodeType*>(&d));
        }
      });
    }
    nodesToExecute_.swap(nodesToExecuteNext_);
    nodesToExecuteNext_.clear();
  }
}

template <typename TaskSetT, typename G>
void ParallelForExecutor::operator()(TaskSetT& taskSet, const G& graph) {
  using NodeType = typename G::NodeType;
  nodesToExecute_.clear();
  nodesToExecuteNext_.clear();

  // Count total nodes to estimate capacity needs
  size_t nodeCount = 0;
  graph.forEachNode([&](const NodeType& node) {
    ++nodeCount;
    if (hasNoIncompletePredecessors(node)) {
      nodesToExecute_.emplace_back(&node);
    }
  });

  // Reserve capacity to avoid reallocations during execution
  size_t estimatedWaveSize = std::max(nodesToExecute_.size(), nodeCount / 4);
  nodesToExecute_.reserve(estimatedWaveSize);
  nodesToExecuteNext_.reserve(estimatedWaveSize);

  while (!nodesToExecute_.empty()) {
    // Use stateful parallel_for: each thread collects ready nodes locally
    dispenso::parallel_for(
        taskSet,
        threadStates_,
        []() { return ThreadLocalCollector{}; },
        size_t(0),
        nodesToExecute_.size(),
        [this](ThreadLocalCollector& collector, size_t i) {
          const NodeType* node = static_cast<const NodeType*>(nodesToExecute_[i]);
          node->run();

          node->forEachDependent([&](const Node& d) {
            // Relaxed ordering is safe here because the parallel_for wave barrier
            // provides a happens-before relationship, and nodesToExecute_.swap()
            // between waves ensures visibility of all side effects from prior waves.
            if (decNumIncompletePredecessors(
                    static_cast<const NodeType&>(d), std::memory_order_relaxed)) {
              collector.readyNodes.push_back(static_cast<const NodeType*>(&d));
            }
          });
        });

    // Merge thread-local collections into next wave
    for (auto& collector : threadStates_) {
      if (!collector.readyNodes.empty()) {
        nodesToExecuteNext_.insert(
            nodesToExecuteNext_.end(), collector.readyNodes.begin(), collector.readyNodes.end());
        collector.readyNodes.clear();
      }
    }

    nodesToExecute_.swap(nodesToExecuteNext_);
    nodesToExecuteNext_.clear();
  }
}

template <typename G>
void ConcurrentTaskSetExecutor::operator()(
    dispenso::ConcurrentTaskSet& tasks,
    const G& graph,
    bool wait) {
  using NodeType = typename G::NodeType;
  startNodes_.clear();

  graph.forEachNode([&](const NodeType& node) {
    if (hasNoIncompletePredecessors(node)) {
      startNodes_.emplace_back(&node);
    }
  });

  for (const Node* n : startNodes_) {
    const NodeType* node = static_cast<const NodeType*>(n);
    tasks.schedule([&tasks, node]() { evaluateNodeConcurrently(tasks, node); });
  }
  if (wait) {
    tasks.wait();
  }
}

template <typename G>
void setAllNodesIncomplete(const G& graph) {
  using NodeType = typename G::NodeType;

  graph.forEachNode([&](const NodeType& node) {
    node.numIncompletePredecessors_.store(node.numPredecessors(), std::memory_order_relaxed);
  });
}

template <typename G>
void ForwardPropagator::operator()(const G& graph) {
  using NodeType = typename G::NodeType;

  nodesToVisit_.clear();
  nodesToVisitNext_.clear();
  visited_.clear();
  groups_.clear();

  graph.forEachNode([&](const NodeType& node) {
    if (!node.isCompleted()) {
      nodesToVisit_.emplace_back(&node);
      visited_.insert(&node);
      appendGroup(static_cast<const NodeType*>(&node), groups_);
    }
  });

  while (!nodesToVisit_.empty()) {
    for (const Node* node : nodesToVisit_) {
      node->forEachDependent([&](const Node& d) {
        addIncompletePredecessor(d);
        if (visited_.insert(static_cast<const NodeType*>(&d)).second) {
          nodesToVisitNext_.emplace_back(static_cast<const NodeType*>(&d));
          appendGroup(static_cast<const NodeType*>(&d), groups_);
        }
      });
    }
    nodesToVisit_.swap(nodesToVisitNext_);
    nodesToVisitNext_.clear();
  }

  propagateIncompleteStateBidirectionally<NodeType>();
}

template <>
void ForwardPropagator::propagateIncompleteStateBidirectionally<Node>() {}
template <>
void ForwardPropagator::propagateIncompleteStateBidirectionally<BiPropNode>() {
  nodesToVisit_.clear();

  for (const std::vector<const BiPropNode*>* group : groups_) {
    for (const dispenso::BiPropNode* gnode : *group) {
      if (gnode->setIncomplete()) {
        nodesToVisit_.emplace_back(gnode);
      }
    }
  }

  for (const Node* node : nodesToVisit_) {
    const BiPropNode* biPropNode = static_cast<const BiPropNode*>(node);
    biPropNode->forEachDependent([](const Node& d) { ifIncompleteAddIncompletePredecessor(d); });
  }
}

template DISPENSO_DLL_ACCESS void SingleThreadExecutor::operator()<Graph>(const Graph&);
template DISPENSO_DLL_ACCESS void SingleThreadExecutor::operator()<BiPropGraph>(const BiPropGraph&);

template DISPENSO_DLL_ACCESS void ParallelForExecutor::operator()<TaskSet, Graph>(
    TaskSet&,
    const Graph&);
template DISPENSO_DLL_ACCESS void ParallelForExecutor::operator()<TaskSet, BiPropGraph>(
    TaskSet&,
    const BiPropGraph&);
template DISPENSO_DLL_ACCESS void ParallelForExecutor::operator()<ConcurrentTaskSet, Graph>(
    ConcurrentTaskSet& tasks,
    const Graph& graph);
template DISPENSO_DLL_ACCESS void ParallelForExecutor::operator()<ConcurrentTaskSet, BiPropGraph>(
    ConcurrentTaskSet& tasks,
    const BiPropGraph& graph);

template DISPENSO_DLL_ACCESS void ConcurrentTaskSetExecutor::operator()<Graph>(
    dispenso::ConcurrentTaskSet& tasks,
    const Graph& graph,
    bool wait);
template DISPENSO_DLL_ACCESS void ConcurrentTaskSetExecutor::operator()<BiPropGraph>(
    dispenso::ConcurrentTaskSet& tasks,
    const BiPropGraph& graph,
    bool wait);

template DISPENSO_DLL_ACCESS void setAllNodesIncomplete<Graph>(const Graph&);
template DISPENSO_DLL_ACCESS void setAllNodesIncomplete<BiPropGraph>(const BiPropGraph&);
template DISPENSO_DLL_ACCESS void ForwardPropagator::operator()<Graph>(const Graph&);
template DISPENSO_DLL_ACCESS void ForwardPropagator::operator()<BiPropGraph>(const BiPropGraph&);

} // namespace dispenso


================================================
FILE: dispenso/graph_executor.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file graph_executor.h
 * @ingroup group_graph
 * A file providing executors for running task graphs.
 **/

#pragma once
#include <dispenso/concurrent_vector.h>
#include <dispenso/detail/graph_executor_impl.h>
#include <dispenso/graph.h>
#include <dispenso/parallel_for.h>
#include <dispenso/platform.h>

namespace dispenso {
/**
 * Class to invoke <code>Graph</code> or <code>BiPropGraph</code> on current thread.
 **/
class SingleThreadExecutor : public ::detail::ExecutorBase {
 public:
  /**
   * Invoke the graph. This is not concurrency safe.
   *
   * @param graph graph to invoke
   **/
  template <typename G>
  void operator()(const G& graph);

 private:
  std::vector<const Node*> nodesToExecute_;
  std::vector<const Node*> nodesToExecuteNext_;
};
/**
 * Class to invoke <code>Graph</code> or <code>BiPropGraph</code> using
 * <code>dispenso::parallel_for</code> for every layer of the graph.
 **/
class ParallelForExecutor : public ::detail::ExecutorBase {
 public:
  /**
   * Invoke the graph. This is not concurrency safe.
   *
   * @param taskSet taksSet to use with <code>parallel_for</code>.
   * @param graph graph to invoke
   **/
  template <typename TaskSetT, typename G>
  void operator()(TaskSetT& taskSet, const G& graph);

 private:
  // Per-thread state for collecting ready nodes locally during parallel_for
  struct ThreadLocalCollector {
    std::vector<const Node*> readyNodes;
  };

  std::vector<const Node*> nodesToExecute_;
  std::vector<const Node*> nodesToExecuteNext_;
  std::vector<ThreadLocalCollector> threadStates_;
};
/**
 * Class to invoke <code>Graph</code> or <code>BiPropGraph</code> using
 * <code>dispenso::ConcurrentTaskSet</code>
 **/
class ConcurrentTaskSetExecutor : public ::detail::ExecutorBase {
 public:
  /**
   * Invoke the graph. This is not concurrency safe.
   *
   * @param tasks <code>ConcurrentTaskSet</code> to schedule tasks.
   * @param graph graph to invoke
   * @param wait if true run <code>tasks.wait()</code> at the end of the function
   **/
  template <typename G>
  void operator()(dispenso::ConcurrentTaskSet& tasks, const G& graph, bool wait = true);

 private:
  std::vector<const Node*> startNodes_;
};

/**
 * Class to propagate incomplete state recursively from nodes to dependents
 **/
class ForwardPropagator : public ::detail::ExecutorBase {
 public:
  /**
   * Propagate incomplete state recursively from nodes to dependents
   * This is not concurrency safe.
   **/
  template <class G>
  void operator()(const G& graph);

 private:
  template <class N>
  void propagateIncompleteStateBidirectionally();

  std::vector<const Node*> nodesToVisit_;
  std::vector<const Node*> nodesToVisitNext_;
  std::unordered_set<const Node*> visited_;
  std::unordered_set<const std::vector<const BiPropNode*>*> groups_;
};
} // namespace dispenso


================================================
FILE: dispenso/latch.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file latch.h
 * @ingroup group_sync
 * A file providing a Latch barrier type, which gives a way for threads to wait until all expected
 * threads have reached this point.  This is intended to match API and behavior of C++20 std::latch.
 **/

#pragma once

#include <dispenso/platform.h>

#include <dispenso/detail/completion_event_impl.h>

namespace dispenso {

/**
 * A class which can be used for barrier scenarios.  See e.g.
 * https://en.cppreference.com/w/cpp/thread/latch
 **/
class Latch {
 public:
  /**
   * Construct a latch with expected number of threads to wait on.
   *
   * @param threadGroupCount The number of threads in the group.
   **/
  explicit Latch(uint32_t threadGroupCount) noexcept : impl_(threadGroupCount) {}

  /**
   * Decrement the counter in a non-blocking manner.
   **/
  void count_down(uint32_t n = 1) noexcept {
    if (impl_.intrusiveStatus().fetch_sub(n, std::memory_order_acq_rel) == 1) {
      impl_.notify(0);
    }
  }

  /**
   * See if the count has been reduced to zero, indicating all necessary threads
   * have synchronized.
   *
   * @note try_wait is a misnomer, as the function never blocks.  We kept the name to match C++20
   * API.
   * @return true only if the internal counter has reached zero.
   **/
  bool try_wait() const noexcept {
    return impl_.intrusiveStatus().load(std::memory_order_acquire) == 0;
  }

  /**
   * Wait for all threads to have synchronized.
   **/
  void wait() const noexcept {
    impl_.wait(0);
  }

  /**
   * Decrement the counter and wait
   **/
  void arrive_and_wait() noexcept {
    if (impl_.intrusiveStatus().fetch_sub(1, std::memory_order_acq_rel) > 1) {
      impl_.wait(0);
    } else {
      impl_.notify(0);
    }
  }

 private:
  detail::CompletionEventImpl impl_;
};

} // namespace dispenso


================================================
FILE: dispenso/once_function.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file once_function.h
 * @ingroup group_core
 * A file providing OnceFunction, a class providing void() signature for closure to be called only
 * once.  It is built to be cheap to create and move.
 **/

#pragma once

#include <utility>

#include <dispenso/detail/once_callable_impl.h>
#include <dispenso/platform.h>

namespace dispenso {

#if DISPENSO_HAS_CONCEPTS
/**
 * @concept OnceCallableFunc
 * @brief A callable suitable for wrapping in OnceFunction or scheduling as a task.
 *
 * The callable must be invocable with no arguments. The return value (if any) is discarded.
 * This is the fundamental requirement for functors passed to OnceFunction,
 * ThreadPool::schedule, TaskSet::schedule, and similar scheduling interfaces.
 **/
template <typename F>
concept OnceCallableFunc = std::invocable<F>;
#endif // DISPENSO_HAS_CONCEPTS

namespace detail {
template <typename Result>
class FutureBase;
template <typename Result>
class FutureImplBase;
} // namespace detail

/**
 * A class fullfilling the void() signature, and operator() must be called exactly once for valid
 * <code>OnceFunction</code>s.  This class can be much more efficient than std::function for type
 * erasing functors without too much state (currently < ~250 bytes).
 * @note The wrapped type-erased functor in OnceFunction is *not* deleted upon destruction, but
 * rather when operator() is called.  It is the user's responsibility to ensure that operator() is
 * called.
 *
 **/
class OnceFunction {
 public:
  /**
   * Construct a <code>OnceFunction</code> with invalid state.
   **/
  OnceFunction()
#if defined DISPENSO_DEBUG
      : data_(nullptr),
        invoke_(nullptr)
#endif // DISPENSO_DEBUG
  {
  }

  /**
   * Construct a <code>OnceFunction</code> with a valid functor.
   *
   * @param f A functor with signature void().  Ideally this should be a concrete functor (e.g. from
   * lambda), though it will work with e.g. std::function.  The downside in the latter case is extra
   * overhead for double type erasure.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  OnceFunction(F&& f) {
    auto callable = detail::createOnceCallable(std::forward<F>(f));
    data_ = callable.data;
    invoke_ = callable.invoke;
  }

  OnceFunction(const OnceFunction& other) = delete;

  /** Move constructor. */
  OnceFunction(OnceFunction&& other) noexcept : data_(other.data_), invoke_(other.invoke_) {
#if defined DISPENSO_DEBUG
    other.data_ = nullptr;
    other.invoke_ = nullptr;
#endif // DISPENSO_DEBUG
  }

  OnceFunction& operator=(OnceFunction&& other) noexcept {
    data_ = other.data_;
    invoke_ = other.invoke_;
#if defined DISPENSO_DEBUG
    if (&other != this) {
      other.data_ = nullptr;
      other.invoke_ = nullptr;
    }
#endif // DISPENSO_DEBUG
    return *this;
  }

  /**
   * Destroy the type-erased functor and release its resources without invoking it.
   * Use this when a OnceFunction will not be called but its resources must still be freed.
   * Like operator(), this must be called at most once, and the OnceFunction must not be used after.
   **/
  void cleanupNotRun() const {
#if defined DISPENSO_DEBUG
    assert(data_ != nullptr && "Must not cleanup an invalid OnceFunction!");
    invoke_(data_, false);
    data_ = nullptr;
    invoke_ = nullptr;
#else
    invoke_(data_, false);
#endif // DISPENSO_DEBUG
  }

  /**
   * Invoke the type-erased functor.  This function must be called exactly once.  Fewer will result
   * in a leak, while more will invoke on an invalid object.
   **/
  void operator()() const {
#if defined DISPENSO_DEBUG
    assert(data_ != nullptr && "Must not use OnceFunction more than once!");
#endif // DISPENSO_DEBUG

    invoke_(data_, true);

#if defined DISPENSO_DEBUG
    data_ = nullptr;
    invoke_ = nullptr;
#endif // DISPENSO_DEBUG
  }

 private:
  OnceFunction(detail::OnceCallable* func, bool) : data_(func), invoke_(&detail::runOnceCallable) {}

  mutable void* data_;
  void (*invoke_)(void*, bool);

  template <typename Result>
  friend class detail::FutureBase;
  template <typename Result>
  friend class detail::FutureImplBase;
};

} // namespace dispenso


================================================
FILE: dispenso/parallel_for.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file parallel_for.h
 * @ingroup group_parallel
 * Functions for performing parallel for loops.
 **/

#pragma once

#include <cmath>
#include <limits>

#include <dispenso/detail/can_invoke.h>
#include <dispenso/detail/per_thread_info.h>
#include <dispenso/small_buffer_allocator.h>
#include <dispenso/task_set.h>

namespace dispenso {

#if DISPENSO_HAS_CONCEPTS
/**
 * @concept ParallelForRangeFunc
 * @brief A callable suitable for chunked parallel_for with (begin, end) signature.
 *
 * The callable must be invocable with two integer arguments representing the chunk range.
 **/
template <typename F, typename IntegerT>
concept ParallelForRangeFunc = std::invocable<F, IntegerT, IntegerT>;

/**
 * @concept ParallelForIndexFunc
 * @brief A callable suitable for element-wise parallel_for with single index signature.
 *
 * The callable must be invocable with a single integer argument representing the element index.
 **/
template <typename F, typename IntegerT>
concept ParallelForIndexFunc = std::invocable<F, IntegerT>;

/**
 * @concept ParallelForStateRangeFunc
 * @brief A callable suitable for stateful chunked parallel_for.
 *
 * The callable must be invocable with (State&, begin, end) arguments.
 **/
template <typename F, typename StateRef, typename IntegerT>
concept ParallelForStateRangeFunc = std::invocable<F, StateRef, IntegerT, IntegerT>;

/**
 * @concept ParallelForStateIndexFunc
 * @brief A callable suitable for stateful element-wise parallel_for.
 *
 * The callable must be invocable with (State&, index) arguments.
 **/
template <typename F, typename StateRef, typename IntegerT>
concept ParallelForStateIndexFunc = std::invocable<F, StateRef, IntegerT>;
#endif // DISPENSO_HAS_CONCEPTS

/**
 * Chunking strategy.  Typically if the cost of each loop iteration is roughly constant, kStatic
 * load balancing is preferred.  Additionally, when making a non-waiting parallel_for call in
 * conjunction with other parallel_for calls or with other task submissions to a TaskSet, some
 * dynamic load balancing is automatically introduced, and selecting kStatic load balancing here can
 * be better.  If the workload per iteration deviates a lot from constant, and some ranges may be
 * much cheaper than others, select kAuto.
 **/
enum class ParForChunking { kStatic, kAuto };

/**
 * A set of options to control parallel_for
 **/
struct ParForOptions {
  /**
   * The maximum number of threads to use.  This can be used to limit the number of threads below
   * the number associated with the TaskSet's thread pool to control the degree of concurrency.
   * Setting maxThreads to zero or one will result in serial operation.
   **/
  uint32_t maxThreads = std::numeric_limits<int32_t>::max();
  /**
   * Specify whether the return of the parallel_for signifies the work is complete.  If the
   * parallel_for is initiated without providing a TaskSet, the parallel_for will always wait.
   *
   * @note If wait is true, the calling thread will always participate in computation.  If this is
   * not desired, pass wait as false, and wait manually outside of the parallel_for on the passed
   * TaskSet.
   **/
  bool wait = true;

  /**
   * Specify whether default chunking should be static or auto (dynamic load balancing).  This is
   * used when invoking the version of parallel_for that takes index parameters (vs a ChunkedRange).
   **/
  ParForChunking defaultChunking = ParForChunking::kStatic;

  /**
   * Specify a minimum number of items per chunk for static or auto dynamic load balancing.  Cheaper
   * workloads should have a higher number of minWorkItems.  Will be ignored if an explicit chunk
   * size is provided to ChunkedRange.
   **/
  uint32_t minItemsPerChunk = 1;

  /**
   * When set to false, and StateContainers are supplied to parallel_for, re-create container from
   * scratch each call to parallel_for.  When true, reuse existing state as much as possible (only
   * create new state if we require more than is already available in the container).
   **/
  bool reuseExistingState = false;
};

/**
 * A helper class for <code>parallel_for</code>.  It provides various configuration parameters to
 * describe how to break up work for parallel processing.  ChunkedRanges can be created with Auto
 * chunking, Static chunking, or specific chunking.  Auto chunking makes large chunks for better
 * cache utilization, but tries to make enough chunks to provide some dynamic load balancing. Static
 * chunking makes N chunks given N threads to run the loop on.  User-specified chunking can be
 * useful for ensuring e.g. that at least a multiple of SIMD width is provided per chunk.
 * <code>parallel_for</code> calls that don't accept a ChunkedRange will create a ChunkedRange
 * internally using Auto chunking.
 **/
template <typename IntegerT = ssize_t>
struct ChunkedRange {
  // We need to utilize 64-bit integers to avoid overflow, e.g. passing -2**30, 2**30 as int32 will
  // result in overflow unless we cast to 64-bit.  Note that if we have a range of e.g. -2**63+1 to
  // 2**63-1, we cannot hold the result in an int64_t.  We could in a uint64_t, but it is quite
  // tricky to make this work.  However, I do not expect ranges larger than can be held in int64_t
  // since people want their computations to finish before the heat death of the sun (slight
  // exaggeration).
  using size_type = std::conditional_t<std::is_signed<IntegerT>::value, int64_t, uint64_t>;

  struct Static {};
  struct Auto {};
  static constexpr IntegerT kStatic = std::numeric_limits<IntegerT>::max();

  /**
   * Create a ChunkedRange with specific chunk size
   *
   * @param s The start of the range.
   * @param e The end of the range.
   * @param c The chunk size.
   **/
  ChunkedRange(IntegerT s, IntegerT e, IntegerT c) : start(s), end(e), chunk(c) {}
  /**
   * Create a ChunkedRange with chunk size equal to total items divided by number of threads.
   *
   * @param s The start of the range.
   * @param e The end of the range.
   **/
  ChunkedRange(IntegerT s, IntegerT e, Static) : ChunkedRange(s, e, kStatic) {}
  /**
   * Create a ChunkedRange with chunk size determined automatically to enable some dynamic load
   * balancing.
   *
   * @param s The start of the range.
   * @param e The end of the range.
   **/
  ChunkedRange(IntegerT s, IntegerT e, Auto) : ChunkedRange(s, e, 0) {}

  bool isStatic() const {
    return chunk == kStatic;
  }

  bool isAuto() const {
    return chunk == 0;
  }

  bool empty() const {
    return end <= start;
  }

  size_type size() const {
    return static_cast<size_type>(end) - start;
  }

  template <typename OtherInt>
  std::tuple<size_type, size_type>
  calcChunkSize(OtherInt numLaunched, bool oneOnCaller, size_type minChunkSize) const {
    size_type workingThreads = static_cast<size_type>(numLaunched) + size_type{oneOnCaller};
    assert(workingThreads > 0);

    if (!chunk) {
      size_type dynFactor = std::min<size_type>(16, size() / workingThreads);
      size_type chunkSize;
      do {
        size_type roughChunks = dynFactor * workingThreads;
        chunkSize = (size() + roughChunks - 1) / roughChunks;
        --dynFactor;
      } while (chunkSize < minChunkSize);
      return {chunkSize, (size() + chunkSize - 1) / chunkSize};
    } else if (chunk == kStatic) {
      // This should never be called.  The static distribution versions of the parallel_for
      // functions should be invoked instead.
      std::abort();
    }
    return {chunk, (size() + chunk - 1) / chunk};
  }

  IntegerT start;
  IntegerT end;
  IntegerT chunk;
};

/**
 * Create a ChunkedRange with specified chunking strategy.
 *
 * @param start The start of the range.
 * @param end The end of the range.
 * @param chunking The strategy to use for chunking.
 **/
template <typename IntegerA, typename IntegerB>
inline ChunkedRange<std::common_type_t<IntegerA, IntegerB>>
makeChunkedRange(IntegerA start, IntegerB end, ParForChunking chunking = ParForChunking::kStatic) {
  using IntegerT = std::common_type_t<IntegerA, IntegerB>;
  return (chunking == ParForChunking::kStatic)
      ? ChunkedRange<IntegerT>(start, end, typename ChunkedRange<IntegerT>::Static())
      : ChunkedRange<IntegerT>(start, end, typename ChunkedRange<IntegerT>::Auto());
}

/**
 * Create a ChunkedRange with specific chunk size
 *
 * @param start The start of the range.
 * @param end The end of the range.
 * @param chunkSize The chunk size.
 **/
template <typename IntegerA, typename IntegerB, typename IntegerC>
inline ChunkedRange<std::common_type_t<IntegerA, IntegerB>>
makeChunkedRange(IntegerA start, IntegerB end, IntegerC chunkSize) {
  return ChunkedRange<std::common_type_t<IntegerA, IntegerB>>(start, end, chunkSize);
}

namespace detail {

struct NoOpIter {
  using difference_type = std::ptrdiff_t;
  using value_type = int;
  using pointer = int*;
  using reference = int&;
  using iterator_category = std::random_access_iterator_tag;

  int& operator*() const {
    static DISPENSO_THREAD_LOCAL int dummy = 0;
    return dummy;
  }
  NoOpIter& operator++() {
    return *this;
  }
  NoOpIter operator++(int) {
    return *this;
  }
  NoOpIter& operator--() {
    return *this;
  }
  NoOpIter operator--(int) {
    return *this;
  }
  NoOpIter& operator+=(difference_type) {
    return *this;
  }
  NoOpIter& operator-=(difference_type) {
    return *this;
  }
  NoOpIter operator+(difference_type) const {
    return *this;
  }
  NoOpIter operator-(difference_type) const {
    return *this;
  }
  difference_type operator-(const NoOpIter&) const {
    return 0;
  }
  bool operator==(const NoOpIter&) const {
    return true;
  }
  bool operator!=(const NoOpIter&) const {
    return false;
  }
  bool operator<(const NoOpIter&) const {
    return false;
  }
  int& operator[](difference_type) const {
    static DISPENSO_THREAD_LOCAL int dummy = 0;
    return dummy;
  }
};

struct NoOpContainer {
  size_t size() const {
    return 0;
  }

  bool empty() const {
    return true;
  }

  void clear() {}

  NoOpIter begin() {
    return {};
  }

  void emplace_back(int) {}

  int& front() {
    static int i;
    return i;
  }
};

struct NoOpStateGen {
  int operator()() const {
    return 0;
  }
};

/**
 * Initialize states container with enough entries for the given thread count.
 * Respects reuseExistingState: when true, only adds entries if the container
 * doesn't already have enough.
 */
template <typename StateContainer, typename StateGen>
void initStates(
    StateContainer& states,
    const StateGen& defaultState,
    size_t numNeeded,
    bool reuseExistingState) {
  if (!reuseExistingState) {
    states.clear();
  }
  for (size_t i = states.size(); i < numNeeded; ++i) {
    states.emplace_back(defaultState());
  }
}

template <
    typename TaskSetT,
    typename IntegerT,
    typename F,
    typename StateContainer,
    typename StateGen>
void parallel_for_staticImpl(
    TaskSetT& taskSet,
    StateContainer& states,
    const StateGen& defaultState,
    const ChunkedRange<IntegerT>& range,
    F&& f,
    ssize_t maxThreads,
    bool wait,
    bool reuseExistingState) {
  using size_type = typename ChunkedRange<IntegerT>::size_type;

  size_type numThreads = std::min<size_type>(taskSet.numPoolThreads() + wait, maxThreads);
  // Reduce threads used if they exceed work to be done.
  numThreads = std::min(numThreads, range.size());

  detail::initStates(states, defaultState, static_cast<size_t>(numThreads), reuseExistingState);

  auto chunking =
      detail::staticChunkSize(static_cast<ssize_t>(range.size()), static_cast<ssize_t>(numThreads));
  IntegerT chunkSize = static_cast<IntegerT>(chunking.ceilChunkSize);

  bool perfectlyChunked = static_cast<size_type>(chunking.transitionTaskIndex) == numThreads;

  // Number of tasks to schedule (all but the last one if wait is true)
  size_type numToSchedule = wait ? numThreads - 1 : numThreads;

  if (numToSchedule > 0) {
    // Precompute range boundaries for the generator
    // First loop: indices [0, transitionTaskIndex) use ceilChunkSize
    // Second loop: indices [transitionTaskIndex, numToSchedule) use ceilChunkSize - 1
    size_type transitionIdx = perfectlyChunked ? numToSchedule : chunking.transitionTaskIndex;
    IntegerT smallChunkSize = static_cast<IntegerT>(chunkSize - !perfectlyChunked);

    taskSet.scheduleBulk(
        static_cast<size_t>(numToSchedule),
        [&, chunkSize, smallChunkSize, transitionIdx](size_t idx) {
          // Calculate start position for this chunk
          IntegerT start;
          IntegerT thisChunkSize;
          if (static_cast<size_type>(idx) < transitionIdx) {
            IntegerT i = static_cast<IntegerT>(idx);
            start = static_cast<IntegerT>(range.start + static_cast<IntegerT>(i * chunkSize));
            thisChunkSize = chunkSize;
          } else {
            // After transition, chunks are smaller by 1
            IntegerT ti = static_cast<IntegerT>(transitionIdx);
            IntegerT ri = static_cast<IntegerT>(idx - transitionIdx);
            start = static_cast<IntegerT>(
                range.start + static_cast<IntegerT>(ti * chunkSize) +
                static_cast<IntegerT>(ri * smallChunkSize));
            thisChunkSize = smallChunkSize;
          }
          IntegerT end = static_cast<IntegerT>(start + thisChunkSize);

          auto stateIt = states.begin();
          std::advance(stateIt, static_cast<ptrdiff_t>(idx));

          return [it = stateIt, start, end, f]() {
            auto recurseInfo = detail::PerPoolPerThreadInfo::parForRecurse();
            f(*it, start, end);
          };
        });
  }

  if (wait) {
    // Execute the last chunk on the calling thread
    auto stateIt = states.begin();
    std::advance(stateIt, static_cast<ptrdiff_t>(numThreads - 1));
    // Calculate start of last chunk
    size_type transitionIdx = perfectlyChunked ? numThreads - 1 : chunking.transitionTaskIndex;
    IntegerT smallChunkSize = static_cast<IntegerT>(chunkSize - !perfectlyChunked);
    IntegerT lastStart;
    if (numThreads - 1 < transitionIdx) {
      IntegerT i = static_cast<IntegerT>(numThreads - 1);
      lastStart = static_cast<IntegerT>(range.start + static_cast<IntegerT>(i * chunkSize));
    } else {
      IntegerT ti = static_cast<IntegerT>(transitionIdx);
      IntegerT ri = static_cast<IntegerT>(numThreads - 1 - transitionIdx);
      lastStart = static_cast<IntegerT>(
          range.start + static_cast<IntegerT>(ti * chunkSize) +
          static_cast<IntegerT>(ri * smallChunkSize));
    }
    f(*stateIt, lastStart, range.end);
    taskSet.wait();
  }
}

template <typename IntegerT>
struct ChunkSizingResult {
  typename ChunkedRange<IntegerT>::size_type maxThreads;
  bool isStatic;
};

/**
 * Adjust the thread count and static/dynamic scheduling decision based on work size.
 *
 * The goal is to avoid oversubscription and ensure each thread gets enough work:
 *
 * 1. Cap maxThreads to available pool threads (plus calling thread if waiting).
 * 2. If minItemsPerChunk > 1, reduce thread count so each thread gets at least
 *    that many items. If even after reduction the chunks are too small, fall back
 *    to static scheduling (which distributes items evenly without atomic contention).
 * 3. If minItemsPerChunk == 1 and the range is smaller than the thread count,
 *    fall back to static scheduling (auto mode) or cap threads (explicit dynamic).
 */
template <typename IntegerT>
ChunkSizingResult<IntegerT> adjustChunkSizing(
    const ChunkedRange<IntegerT>& range,
    typename ChunkedRange<IntegerT>::size_type maxThreads,
    bool isStatic,
    uint32_t minItemsPerChunk,
    typename ChunkedRange<IntegerT>::size_type poolThreads,
    bool wait) {
  using size_type = typename ChunkedRange<IntegerT>::size_type;

  // Step 1: never use more threads than the pool actually has
  maxThreads = std::min<size_type>(maxThreads, poolThreads + wait);

  if (minItemsPerChunk > 1) {
    // Step 2a: reduce threads so each gets at least minItemsPerChunk items
    size_type maxWorkers = range.size() / minItemsPerChunk;
    if (maxWorkers < maxThreads) {
      maxThreads = maxWorkers;
    }
    // Step 2b: if dynamic chunks would still be too small, use static scheduling
    if (maxThreads > 0 && range.size() / (maxThreads + wait) < minItemsPerChunk && range.isAuto()) {
      isStatic = true;
    }
  } else if (range.size() <= poolThreads + wait) {
    // Step 3: fewer items than threads — static is better (no atomic overhead)
    if (range.isAuto()) {
      isStatic = true;
    } else if (!range.isStatic()) {
      maxThreads = range.size() - wait;
    }
  }

  return {maxThreads, isStatic};
}

/**
 * Dynamic (work-stealing) parallel_for implementation.
 *
 * Workers atomically claim chunks from a shared index and process them.
 * The ExitAction callback is invoked when a worker finds no more chunks;
 * this allows the no-wait path to deallocate a heap-allocated index when
 * the last worker exits, while the wait path passes a no-op.
 *
 * When wait is true, the calling thread participates as an additional worker
 * and blocks until all tasks complete.
 */
template <
    typename TaskSetT,
    typename IntegerT,
    typename F,
    typename StateContainer,
    typename IndexRef,
    typename ExitAction>
void parallel_for_dynamicImpl(
    TaskSetT& taskSet,
    StateContainer& states,
    IntegerT start,
    IntegerT end,
    F&& f,
    size_t numToLaunch,
    typename ChunkedRange<IntegerT>::size_type chunkSize,
    typename ChunkedRange<IntegerT>::size_type numChunks,
    IndexRef& index,
    ExitAction exitAction,
    bool wait) {
  auto worker = [start, end, &index, f, chunkSize, numChunks, exitAction](auto& s) {
    auto recurseInfo = detail::PerPoolPerThreadInfo::parForRecurse();
    while (true) {
      auto cur = index.fetch_add(1, std::memory_order_relaxed);
      if (cur >= numChunks) {
        exitAction(cur);
        break;
      }
      auto sidx = static_cast<IntegerT>(start + cur * chunkSize);
      if (cur + 1 == numChunks) {
        f(s, sidx, end);
      } else {
        f(s, sidx, static_cast<IntegerT>(sidx + chunkSize));
      }
    }
  };

  taskSet.scheduleBulk(static_cast<size_t>(numToLaunch), [&states, &worker](size_t i) {
    auto it = states.begin();
    std::advance(it, static_cast<ptrdiff_t>(i));
    return [&s = *it, worker]() { worker(s); };
  });

  if (wait) {
    auto it = states.begin();
    std::advance(it, static_cast<ptrdiff_t>(numToLaunch));
    worker(*it);
    taskSet.wait();
  }
}

} // namespace detail

/**
 * Execute loop over the range in parallel.
 *
 * @param taskSet The task set to schedule the loop on.
 * @param states A container of <code>State</code> (actual type of State TBD by user).  The
 * container will be resized to hold a <code>State</code> object per executing thread.  Container
 * must provide emplace_back() and must be forward-iterable.  Examples include std::vector,
 * std::deque, and std::list.  These are the states passed into <code>f</code>, and states must
 * remain a valid object until work is completed.
 * @param defaultState A functor with signature State().  It will be called to initialize the
 * objects for <code>states</code>.
 * @param range The range defining the loop extents as well as chunking strategy.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(State &s, size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.
 **/
template <
    typename TaskSetT,
    typename IntegerT,
    typename F,
    typename StateContainer,
    typename StateGen>
void parallel_for(
    TaskSetT& taskSet,
    StateContainer& states,
    const StateGen& defaultState,
    const ChunkedRange<IntegerT>& range,
    F&& f,
    ParForOptions options = {}) {
  if (range.empty()) {
    if (options.wait) {
      taskSet.wait();
    }
    return;
  }

  using size_type = typename ChunkedRange<IntegerT>::size_type;

  uint32_t minItemsPerChunk = std::max<uint32_t>(1, options.minItemsPerChunk);
  size_type maxThreads = std::max<int32_t>(options.maxThreads, 1);
  bool isStatic = range.isStatic();

  const size_type N = taskSet.numPoolThreads();
  if (N == 0 || !options.maxThreads || range.size() <= minItemsPerChunk ||
      detail::PerPoolPerThreadInfo::isParForRecursive(&taskSet.pool())) {
    detail::initStates(states, defaultState, 1, options.reuseExistingState);
    f(*states.begin(), range.start, range.end);
    if (options.wait) {
      taskSet.wait();
    }
    return;
  }

  auto chunkSizing =
      detail::adjustChunkSizing(range, maxThreads, isStatic, minItemsPerChunk, N, options.wait);
  maxThreads = chunkSizing.maxThreads;
  isStatic = chunkSizing.isStatic;

  // If adjustment reduced threads below 2, run inline — not worth parallelizing.
  if (maxThreads < 2) {
    detail::initStates(states, defaultState, 1, options.reuseExistingState);
    f(*states.begin(), range.start, range.end);
    if (options.wait) {
      taskSet.wait();
    }
    return;
  }

  if (isStatic) {
    detail::parallel_for_staticImpl(
        taskSet,
        states,
        defaultState,
        range,
        std::forward<F>(f),
        static_cast<ssize_t>(maxThreads),
        options.wait,
        options.reuseExistingState);
    return;
  }

  const size_type numToLaunch = std::min<size_type>(maxThreads - options.wait, N);

  detail::initStates(
      states,
      defaultState,
      static_cast<size_t>(numToLaunch + options.wait),
      options.reuseExistingState);

  if (numToLaunch == 1 && !options.wait) {
    taskSet.schedule(
        [&s = states.front(), range, f = std::move(f)]() { f(s, range.start, range.end); });
    return;
  }

  auto chunkInfo = range.calcChunkSize(numToLaunch, options.wait, minItemsPerChunk);
  auto chunkSize = std::get<0>(chunkInfo);
  auto numChunks = std::get<1>(chunkInfo);

  if (options.wait) {
    alignas(kCacheLineSize) std::atomic<decltype(numChunks)> index(0);
    detail::parallel_for_dynamicImpl(
        taskSet,
        states,
        range.start,
        range.end,
        std::forward<F>(f),
        static_cast<size_t>(numToLaunch),
        chunkSize,
        numChunks,
        index,
        [](auto) {},
        options.wait);
  } else {
    using SizeType = decltype(numChunks);
    struct ChunkIndex {
      std::atomic<SizeType> index;
    };
    static_assert(sizeof(ChunkIndex) <= kCacheLineSize, "ChunkIndex must fit in one cache line");
    char* mem = allocSmallBuffer<kCacheLineSize>();
    auto* ci = new (mem) ChunkIndex{{0}};
    SizeType lastExit = numChunks + static_cast<SizeType>(numToLaunch) - 1;
    detail::parallel_for_dynamicImpl(
        taskSet,
        states,
        range.start,
        range.end,
        std::forward<F>(f),
        static_cast<size_t>(numToLaunch),
        chunkSize,
        numChunks,
        ci->index,
        [ci, lastExit](auto cur) {
          if (cur == lastExit) {
            deallocSmallBuffer<kCacheLineSize>(ci);
          }
        },
        options.wait);
  }
}

/**
 * Execute loop over the range in parallel.
 *
 * @param taskSet The task set to schedule the loop on.
 * @param range The range defining the loop extents as well as chunking strategy.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.
 **/
template <typename TaskSetT, typename IntegerT, typename F>
DISPENSO_REQUIRES(ParallelForRangeFunc<F, IntegerT>)
void parallel_for(
    TaskSetT& taskSet,
    const ChunkedRange<IntegerT>& range,
    F&& f,
    ParForOptions options = {}) {
  detail::NoOpContainer container;
  parallel_for(
      taskSet,
      container,
      detail::NoOpStateGen(),
      range,
      [f = std::move(f)](int /*noop*/, auto i, auto j) { f(i, j); },
      options);
}

/**
 * Execute loop over the range in parallel on the global thread pool, and wait until complete.
 *
 * @param range The range defining the loop extents as well as chunking strategy.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.  <code>options.wait</code> will always be reset
 *to true.
 **/
template <typename IntegerT, typename F>
DISPENSO_REQUIRES(ParallelForRangeFunc<F, IntegerT>)
void parallel_for(const ChunkedRange<IntegerT>& range, F&& f, ParForOptions options = {}) {
  TaskSet taskSet(globalThreadPool());
  options.wait = true;
  parallel_for(taskSet, range, std::forward<F>(f), options);
}

/**
 * Execute loop over the range in parallel on the global thread pool and block until loop
 *completion.
 *
 * @param states A container of <code>State</code> (actual type of State TBD by user).  The
 * container will be resized to hold a <code>State</code> object per executing thread.  Container
 * must provide emplace_back() and must be forward-iterable.  Examples include std::vector,
 * std::deque, and std::list.  These are the states passed into <code>f</code>, and states must
 * remain a valid object until work is completed.
 * @param defaultState A functor with signature State().  It will be called to initialize the
 * objects for <code>states</code>.
 * @param range The range defining the loop extents as well as chunking strategy.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(State &s, size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.  <code>options.wait</code> will always be reset
 *to true.
 **/
template <typename F, typename IntegerT, typename StateContainer, typename StateGen>
void parallel_for(
    StateContainer& states,
    const StateGen& defaultState,
    const ChunkedRange<IntegerT>& range,
    F&& f,
    ParForOptions options = {}) {
  TaskSet taskSet(globalThreadPool());
  options.wait = true;
  parallel_for(taskSet, states, defaultState, range, std::forward<F>(f), options);
}

/**
 * Execute loop over the range in parallel.
 *
 * @param taskSet The task set to schedule the loop on.
 * @param start The start of the loop extents.
 * @param end The end of the loop extents.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(size_t index)</code> or <code>void(size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.
 **/
template <
    typename TaskSetT,
    typename IntegerA,
    typename IntegerB,
    typename F,
    std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
    std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
    std::enable_if_t<detail::CanInvoke<F(IntegerA)>::value, bool> = true>
void parallel_for(
    TaskSetT& taskSet,
    IntegerA start,
    IntegerB end,
    F&& f,
    ParForOptions options = {}) {
  using IntegerT = std::common_type_t<IntegerA, IntegerB>;

  auto range = makeChunkedRange(start, end, options.defaultChunking);
  parallel_for(
      taskSet,
      range,
      [f = std::move(f)](IntegerT s, IntegerT e) {
        for (IntegerT i = s; i < e; ++i) {
          f(i);
        }
      },
      options);
}

/** @overload */
template <
    typename TaskSetT,
    typename IntegerA,
    typename IntegerB,
    typename F,
    std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
    std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
    std::enable_if_t<detail::CanInvoke<F(IntegerA, IntegerB)>::value, bool> = true>
void parallel_for(
    TaskSetT& taskSet,
    IntegerA start,
    IntegerB end,
    F&& f,
    ParForOptions options = {}) {
  auto range = makeChunkedRange(start, end, options.defaultChunking);
  parallel_for(taskSet, range, std::forward<F>(f), options);
}

/**
 * Execute loop over the range in parallel on the global thread pool and block on loop completion.
 *
 * @param start The start of the loop extents.
 * @param end The end of the loop extents.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(size_t index)</code> or <code>void(size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.  <code>options.wait</code> will always be reset
 *to true.
 **/
template <
    typename IntegerA,
    typename IntegerB,
    typename F,
    std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
    std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true>
void parallel_for(IntegerA start, IntegerB end, F&& f, ParForOptions options = {}) {
  TaskSet taskSet(globalThreadPool());
  options.wait = true;
  parallel_for(taskSet, start, end, std::forward<F>(f), options);
}

/**
 * Execute loop over the range in parallel.
 *
 * @param taskSet The task set to schedule the loop on.
 * @param states A container of <code>State</code> (actual type of State TBD by user).  The
 * container will be resized to hold a <code>State</code> object per executing thread.  Container
 * must provide emplace_back() and must be forward-iterable.  Examples include std::vector,
 * std::deque, and std::list.  These are the states passed into <code>f</code>, and states must
 * remain a valid object until work is completed.
 * @param defaultState A functor with signature State().  It will be called to initialize the
 * objects for <code>states</code>.
 * @param start The start of the loop extents.
 * @param end The end of the loop extents.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(State &s, size_t index)</code> or
 * <code>void(State &s, size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.
 **/
template <
    typename TaskSetT,
    typename IntegerA,
    typename IntegerB,
    typename F,
    typename StateContainer,
    typename StateGen,
    std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
    std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
    std::enable_if_t<
        detail::CanInvoke<F(typename StateContainer::reference, IntegerA)>::value,
        bool> = true>
void parallel_for(
    TaskSetT& taskSet,
    StateContainer& states,
    const StateGen& defaultState,
    IntegerA start,
    IntegerB end,
    F&& f,
    ParForOptions options = {}) {
  using IntegerT = std::common_type_t<IntegerA, IntegerB>;
  auto range = makeChunkedRange(start, end, options.defaultChunking);
  parallel_for(
      taskSet,
      states,
      defaultState,
      range,
      [f = std::move(f)](auto& state, IntegerT s, IntegerT e) {
        for (IntegerT i = s; i < e; ++i) {
          f(state, i);
        }
      },
      options);
}

/** @overload */
template <
    typename TaskSetT,
    typename IntegerA,
    typename IntegerB,
    typename F,
    typename StateContainer,
    typename StateGen,
    std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
    std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true,
    std::enable_if_t<
        detail::CanInvoke<F(typename StateContainer::reference, IntegerA, IntegerB)>::value,
        bool> = true>
void parallel_for(
    TaskSetT& taskSet,
    StateContainer& states,
    const StateGen& defaultState,
    IntegerA start,
    IntegerB end,
    F&& f,
    ParForOptions options = {}) {
  auto range = makeChunkedRange(start, end, options.defaultChunking);
  parallel_for(taskSet, states, defaultState, range, std::forward<F>(f), options);
}

/**
 * Execute loop over the range in parallel on the global thread pool and block until loop
 *completion.
 *
 * @param states A container of <code>State</code> (actual type of State TBD by user).  The
 * container will be resized to hold a <code>State</code> object per executing thread.  Container
 * must provide emplace_back() and must be forward-iterable.  Examples include std::vector,
 * std::deque, and std::list.  These are the states passed into <code>f</code>, and states must
 * remain a valid object until work is completed.
 * @param defaultState A functor with signature State().  It will be called to initialize the
 * objects for <code>states</code>.
 * @param start The start of the loop extents.
 * @param end The end of the loop extents.
 * @param f The functor to execute in parallel.  Must have a signature like
 * <code>void(State &s, size_t index)</code> or
 * <code>void(State &s, size_t begin, size_t end)</code>.
 * @param options See ParForOptions for details.  <code>options.wait</code> will always be reset
 *to true.
 **/
template <
    typename IntegerA,
    typename IntegerB,
    typename F,
    typename StateContainer,
    typename StateGen,
    std::enable_if_t<std::is_integral<IntegerA>::value, bool> = true,
    std::enable_if_t<std::is_integral<IntegerB>::value, bool> = true>
void parallel_for(
    StateContainer& states,
    const StateGen& defaultState,
    IntegerA start,
    IntegerB end,
    F&& f,
    ParForOptions options = {}) {
  TaskSet taskSet(globalThreadPool());
  options.wait = true;
  parallel_for(taskSet, states, defaultState, start, end, std::forward<F>(f), options);
}

} // namespace dispenso


================================================
FILE: dispenso/pipeline.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file pipeline.h
 * @ingroup group_graph
 * A file providing utilities for parallel pipelining of work.
 **/

#pragma once

#include <limits>

#include <dispenso/detail/pipeline_impl.h>

namespace dispenso {

/**
 * OpResult is like a poor-man's std::optional for those who wish to use dispenso pipeline filtering
 * in C++14.  In C++17 and beyond, it is recommended to use std::optional instead.  OpResult has
 * implicit construct from T, just like std::optional, and move/copy constructors and operators,
 * bool conversion, and value() function, but otherwise provides less functionality than
 * std::optional.
 **/
template <typename T>
using OpResult = detail::OpResult<T>;

/**
 * A simple constant representing maximum parallelism for a stage.  This number has no particular
 * significance, and is simply here for convenience.
 **/
constexpr ssize_t kStageNoLimit = std::numeric_limits<ssize_t>::max();

/**
 * Create a stage for use in the pipeline function.
 *
 * @param f A function-like object that can accept the result of the previous stage (if any), and
 * which produces the output for the next stage (if any).
 * @param limit How many threads may concurrently run work for this stage.  Values larger than the
 * number of threads in the associated thread pool of the used ConcurrentTaskSet will be capped to
 * the size of the pool.
 * @return A stage object suitable for pipelining.
 **/
template <typename F>
auto stage(F&& f, ssize_t limit) {
  return detail::Stage<F>(std::forward<F>(f), limit);
}

/**
 * Pipeline work in stages.  Pipelines allow stages to specify parallelism limits by using the
 * <code>stage</code> function, or a function-like object can simply be passed directly, indicating
 * a serial stage.  Even if stages are serial, there can be parallelism between stages, so in a 3
 * stage serial pipeline, the expected runtime is the max of the 3 stages runtimes (note that this
 * is in the absence of pipeline overheads and with an infinitely long workstream.  In practice
 * speedup is somewhat less). This function will block until the entire pipeline has completed.
 *
 * @param pool The ThreadPool to run the work in.  This inherently determines the upper bound for
 * parallelism of the pipeline.
 * @param sIn The stages to run.  The first stage must be a Generator stage, the last must be a Sink
 * stage, and intermediate stages are Transform stages.
 * - If there is only one stage, it takes no
 * arguments, but returns a bool indicating completion (false means the pipeline is complete).
 * - Otherwise, the Generator stage takes no arguments and  must return an OpResult or std::optional
 * value, and an invalid/nullopt result indicates that the Generator is done (no more values
 * forthcoming).
 * - Transform stages should accept the output of the prior stage (or output.value() in the case of
 * OpResult or std::optional), and should return either a value or an OpResult or std::optional
 * value if the Transform is capable of filtering results. Invalid/nullopt OpResult or std::optional
 * values indicate that the value should be filtered, and not passed on to the next stage.
 * - The Sink stage should accept the output of the prior stage, just as a Transform stage does, but
 * does not return any value (or at least the pipeline will ignore it).
 *
 * @note <b>Exception behavior:</b> If a stage function throws an exception, the pipeline will stop
 * producing new work (the generator checks for exceptions between iterations) and allow any
 * already-in-flight work to complete. Queued work that has not yet started will be discarded
 * without execution. The first captured exception is rethrown from <code>pipeline()</code> when the
 * pipeline winds down. Subsequent exceptions from concurrently in-flight stages are silently
 * discarded. All internal state (concurrency counters, resource slots, completion events) is
 * cleaned up via RAII, so the thread pool remains in a valid state after an exception and may be
 * reused for subsequent pipelines. When exceptions are disabled at compile time
 * (<code>__cpp_exceptions</code> not defined), all exception-related checks are eliminated with
 * zero overhead.
 **/
template <typename... Stages>
void pipeline(ThreadPool& pool, Stages&&... sIn) {
  ConcurrentTaskSet tasks(pool);
  auto pipes = detail::makePipes(tasks, std::forward<Stages>(sIn)...);
  pipes.execute();
  pipes.wait();
}

/**
 * Pipeline work in stages.  Pipelines allow stages to specify parallelism limits by using the
 * <code>stage</code> function, or a function-like object can simply be passed directly, indicating
 * a serial stage.  Even if stages are serial, there can be parallelism between stages, so in a 3
 * stage serial pipeline, the expected runtime is the max of the 3 stages runtimes (note that this
 * is in the absence of pipeline overheads and with an infinitely long workstream.  In practice
 * speedup is somewhat less). Work will be run on dispenso's global thread pool.  This function will
 * block until the entire pipeline has completed.
 *
 * @param sIn The stages to run.  The first stage must be a Generator stage, the last must be a Sink
 * stage, and intermediate stages are Transform stages.
 * - If there is only one stage, it takes no
 * arguments, but returns a bool indicating completion (false means the pipeline is complete).
 * - Otherwise, the Generator stage takes no arguments and  must return an OpResult or std::optional
 * value, and an invalid/nullopt result indicates that the Generator is done (no more values
 * forthcoming).
 * - Transform stages should accept the output of the prior stage (or output.value() in the case of
 * OpResult or std::optional), and should return either a value or an OpResult or std::optional
 * value if the Transform is capable of filtering results. Invalid/nullopt OpResult or std::optional
 * values indicate that the value should be filtered, and not passed on to the next stage.
 * - The Sink stage should accept the output of the prior stage, just as a Transform stage does, but
 * does not return any value (or at least the pipeline will ignore it).
 *
 * @note See the ThreadPool overload of <code>pipeline()</code> for exception behavior details.
 **/
template <typename... Stages>
void pipeline(Stages&&... sIn) {
  pipeline(globalThreadPool(), std::forward<Stages>(sIn)...);
}

} // namespace dispenso


================================================
FILE: dispenso/platform.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file platform.h
 * @ingroup group_util
 * Platform constants and common utilities.
 **/

#pragma once
#include <algorithm>
#include <atomic>
#include <cassert>
#include <cstdlib>
#include <memory>
#include <thread>
#include <type_traits>

#if defined(_MSC_VER) && \
    (defined(_M_AMD64) || defined(_M_IX86) || defined(_M_ARM64) || defined(_M_ARM))
#include <intrin.h>
#endif

namespace dispenso {

#define DISPENSO_MAJOR_VERSION 1
#define DISPENSO_MINOR_VERSION 5
#define DISPENSO_PATCH_VERSION 1

// C++20 concepts support detection
#if __cplusplus >= 202002L && defined(__cpp_concepts) && __cpp_concepts >= 201907L
#define DISPENSO_HAS_CONCEPTS 1
#include <concepts>
#else
#define DISPENSO_HAS_CONCEPTS 0
#endif

/**
 * @def DISPENSO_REQUIRES
 * @brief Macro for conditionally applying C++20 concept constraints.
 *
 * On C++20 with concepts support, this expands to a requires clause.
 * On C++14/17, this expands to nothing, maintaining backward compatibility.
 *
 * Example usage:
 * @code
 * template <typename F>
 * DISPENSO_REQUIRES(std::invocable<F>)
 * void schedule(F&& f);
 * @endcode
 **/
#if DISPENSO_HAS_CONCEPTS
#define DISPENSO_REQUIRES(...) requires(__VA_ARGS__)
#else
#define DISPENSO_REQUIRES(...)
#endif

#if defined(DISPENSO_SHARED_LIB)
#if defined _WIN32

#if defined(DISPENSO_LIB_EXPORT)
#define DISPENSO_DLL_ACCESS __declspec(dllexport)
#else
#define DISPENSO_DLL_ACCESS __declspec(dllimport)
#endif // DISPENSO_LIB_EXPORT

#elif defined(__clang__) || defined(__GNUC__)
#define DISPENSO_DLL_ACCESS __attribute__((visibility("default")))
#endif // PLATFORM
#endif // DISPENSO_SHARED_LIB

#if !defined(DISPENSO_DLL_ACCESS)
#define DISPENSO_DLL_ACCESS
#endif // DISPENSO_DLL_ACCESS

using ssize_t = std::make_signed<std::size_t>::type;

#if defined(__clang__) || defined(__GNUC__)
#define DISPENSO_INLINE __attribute__((always_inline)) inline
#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)
#define DISPENSO_INLINE __forceinline
#else
#define DISPENSO_INLINE inline
#endif // PLATFORM

/**
 * @var constexpr size_t kCacheLineSize
 * @brief A constant that defines a safe number of bytes+alignment to avoid false sharing.
 **/
#if defined(__APPLE__) && defined(__arm64__)
constexpr size_t kCacheLineSize = 128;
#else
constexpr size_t kCacheLineSize = 64;
#endif

/**
 * @def DISPENSO_THREAD_LOCAL
 * @brief A macro that can be used when declaring a lightweight thread-local variable.
 **/

// TODO(bbudge): Non-gcc/clang/msvc platforms.
#if defined(_MSC_VER)
#define DISPENSO_THREAD_LOCAL __declspec(thread)
#elif defined(__GNUC__) || defined(__clang__)
#define DISPENSO_THREAD_LOCAL __thread
#else
#error Supply lightweight thread-locals for this compiler.  Can define to thread_local if lightweight not available
#endif

#if (defined(__GNUC__) || defined(__clang__))
#define DISPENSO_EXPECT(a, b) __builtin_expect(a, b)
#else
#define DISPENSO_EXPECT(a, b) a
#endif

// clang-format off
#if (defined(__GNUC__) || defined(__clang__))
#define DO_PRAGMA(X) _Pragma(#X)
#define DISPENSO_DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push)
#define DISPENSO_DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop)
#define DISPENSO_DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName)
#if !defined(__clang__)
#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS
#else
#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS \
  DISPENSO_DISABLE_WARNING(-Wgnu-zero-variadic-macro-arguments)
#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS \
  DISPENSO_DISABLE_WARNING(-Wglobal-constructors)
#endif
#elif defined(_MSC_VER)
#define DISPENSO_DISABLE_WARNING_PUSH __pragma(warning(push))
#define DISPENSO_DISABLE_WARNING_POP __pragma(warning(pop))
#define DISPENSO_DISABLE_WARNING(warningNumber) __pragma(warning(disable : warningNumber))
#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS
#else
#define DISPENSO_DISABLE_WARNING_PUSH
#define DISPENSO_DISABLE_WARNING_POP
#define DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
#define DISPENSO_DISABLE_WARNING_GLOBAL_CONSTRUCTORS
#endif
// clang-format on

/**
 * A wrapper that aligns the contained value to cache line boundaries.
 *
 * Useful for avoiding false sharing in concurrent data structures.
 *
 * @tparam T The type to wrap with cache line alignment.
 */
template <typename T>
class CacheAligned {
 public:
  CacheAligned() = default;
  /** Construct from a value. @param t The value to wrap. */
  CacheAligned(T t) : t_(t) {}
  operator T&() {
    return t_;
  }

  operator const T&() const {
    return t_;
  }

 private:
  alignas(kCacheLineSize) T t_;
};

namespace detail {

template <typename T>
struct AlignedBuffer {
  alignas(alignof(T)) char b[sizeof(T)];
};

template <typename T>
struct alignas(kCacheLineSize) AlignedAtomic : public std::atomic<T*> {};

inline void* alignedMalloc(size_t bytes, size_t alignment) {
  alignment = std::max(alignment, sizeof(uintptr_t));
  char* ptr = reinterpret_cast<char*>(::malloc(bytes + alignment));
  uintptr_t base = reinterpret_cast<uintptr_t>(ptr);
  uintptr_t oldBase = base;
  uintptr_t mask = alignment - 1;
  base += alignment;
  base &= ~mask;

  uintptr_t* recovery = reinterpret_cast<uintptr_t*>(base - sizeof(uintptr_t));
  *recovery = oldBase;
  return reinterpret_cast<void*>(base);
}

inline void* alignedMalloc(size_t bytes) {
  return alignedMalloc(bytes, kCacheLineSize);
}

inline void alignedFree(void* ptr) {
  if (!ptr) {
    return;
  }
  char* p = reinterpret_cast<char*>(ptr);
  uintptr_t recovered = *reinterpret_cast<uintptr_t*>(p - sizeof(uintptr_t));
  ::free(reinterpret_cast<void*>(recovered));
}

template <typename T>
struct AlignedFreeDeleter {
  void operator()(T* ptr) {
    ptr->~T();
    detail::alignedFree(ptr);
  }
};
template <>
struct AlignedFreeDeleter<void> {
  void operator()(void* ptr) {
    detail::alignedFree(ptr);
  }
};

template <typename T, class... Args>
std::shared_ptr<T> make_shared(Args&&... args) {
  void* tv = alignedMalloc(sizeof(T), alignof(T));
  T* t = new (tv) T(std::forward<Args>(args)...);
  return std::shared_ptr<T>(t, AlignedFreeDeleter<T>());
}

inline constexpr uintptr_t alignToCacheLine(uintptr_t val) {
  constexpr uintptr_t kMask = kCacheLineSize - 1;
  val += kMask;
  val &= ~kMask;
  return val;
}

#if defined __x86_64__ || defined __i386__
inline void cpuRelax() {
  asm volatile("pause" ::: "memory");
}
#elif defined _MSC_VER && (defined _M_AMD64 || defined _M_IX86)
inline void cpuRelax() {
  _mm_pause();
}
#elif defined __arm64__ || defined __aarch64__
inline void cpuRelax() {
  asm volatile("yield" ::: "memory");
}
#elif defined _MSC_VER && (defined _M_ARM64 || defined _M_ARM)
inline void cpuRelax() {
  __yield();
}
#elif defined __powerpc__ || defined __POWERPC__
#if defined __APPLE__
inline void cpuRelax() {
  asm volatile("or r27,r27,r27" ::: "memory");
}
#else
inline void cpuRelax() {
  asm volatile("or 27,27,27" ::: "memory");
}
#endif // APPLE
#else
// TODO: provide reasonable relax on other archs.
inline void cpuRelax() {}
#endif // ARCH

// When statically chunking a range, it is generally not possible to use a single chunk size plus
// remainder and get a good load distribution.  By estimating too high, we can have idle threads. By
// estimating too low, the remainder can be several times as large as the chunk for other threads.
// Instead, we compute the chunk size that is the ceil of the fractional chunk size.  That can be
// used for the first transitionIndex values, while the remaining (chunks - transitionTaskIndex)
// values will be ceilChunkSize - 1.
struct StaticChunking {
  ssize_t transitionTaskIndex;
  ssize_t ceilChunkSize;
};

inline StaticChunking staticChunkSize(ssize_t items, ssize_t chunks) {
  assert(chunks > 0);
  StaticChunking chunking;
  chunking.ceilChunkSize = (items + chunks - 1) / chunks;
  ssize_t numLeft = chunking.ceilChunkSize * chunks - items;
  chunking.transitionTaskIndex = chunks - numLeft;
  return chunking;
}

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/pool_allocator.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/pool_allocator.h>

namespace dispenso {

template <bool kThreadSafe>
PoolAllocatorT<kThreadSafe>::PoolAllocatorT(
    size_t chunkSize,
    size_t allocSize,
    std::function<void*(size_t)> allocFunc,
    std::function<void(void*)> deallocFunc)
    : chunkSize_(chunkSize),
      allocSize_(allocSize),
      chunksPerAlloc_(allocSize / chunkSize),
      allocFunc_(std::move(allocFunc)),
      deallocFunc_(std::move(deallocFunc)) {
  // Start off with at least enough space to store at least one set of chunks.
  chunks_.reserve(chunksPerAlloc_);
}

template <bool kThreadSafe>
char* PoolAllocatorT<kThreadSafe>::alloc() {
  while (true) {
    uint32_t allocId = 0;
    if (kThreadSafe) {
      allocId = backingAllocLock_.fetch_or(1, std::memory_order_acquire);
    }

    if (allocId == 0) {
      if (chunks_.empty()) {
        char* buffer;
        if (backingAllocs2_.empty()) {
          buffer = reinterpret_cast<char*>(allocFunc_(allocSize_));
        } else {
          buffer = backingAllocs2_.back();
          backingAllocs2_.pop_back();
        }
        backingAllocs_.push_back(buffer);
        // Push n-1 values into the chunks_ buffer, and then return the nth.
        for (size_t i = 0; i < chunksPerAlloc_ - 1; ++i) {
          chunks_.push_back(buffer);
          buffer += chunkSize_;
        }
        if (kThreadSafe) {
          backingAllocLock_.store(0, std::memory_order_release);
        }
        return buffer;
      }
      char* back = chunks_.back();
      chunks_.pop_back();
      if (kThreadSafe) {
        backingAllocLock_.store(0, std::memory_order_release);
      }
      return back;
    } else {
      std::this_thread::yield();
    }
  }
}

template <bool kThreadSafe>
void PoolAllocatorT<kThreadSafe>::dealloc(char* ptr) {
  // For now do not release any memory back to the deallocFunc until destruction.
  // TODO(bbudge): Consider cases where we haven't gotten below some threshold of ready chunks
  // in a while.  In that case, we could begin tracking allocations, and try to assemble entire
  // starting allocations, possibly deferring a small amount to each alloc call.  This would be
  // slower, but would ensure we don't get into a situation where we need a bunch of memory up
  // front, and then never again.

  while (true) {
    uint32_t allocId = 0;
    if (kThreadSafe) {
      allocId = backingAllocLock_.fetch_or(1, std::memory_order_acquire);
    }
    if (allocId == 0) {
      chunks_.push_back(ptr);
      if (kThreadSafe) {
        backingAllocLock_.store(0, std::memory_order_release);
      }
      break;
    }
  }
}

template <bool kThreadSafe>
void PoolAllocatorT<kThreadSafe>::clear() {
  chunks_.clear();
  if (backingAllocs2_.size() < backingAllocs_.size()) {
    std::swap(backingAllocs2_, backingAllocs_);
  }
  for (char* ba : backingAllocs_) {
    backingAllocs2_.push_back(ba);
  }
  backingAllocs_.clear();
}

template <bool kThreadSafe>
PoolAllocatorT<kThreadSafe>::~PoolAllocatorT() {
  for (char* backing : backingAllocs_) {
    deallocFunc_(backing);
  }
  for (char* backing : backingAllocs2_) {
    deallocFunc_(backing);
  }
}

template class PoolAllocatorT<false>;
template class PoolAllocatorT<true>;

} // namespace dispenso


================================================
FILE: dispenso/pool_allocator.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file pool_allocator.h
 * @ingroup group_alloc
 * A pool allocator to help reduce calls to the underlying allocation and deallocation functions
 * that can be provided custom backing allocation and deallocation functions, e.g. cudaMalloc,
 * cudaFree.
 **/

#pragma once

#include <atomic>
#include <functional>
#include <vector>

#include <dispenso/platform.h>

namespace dispenso {

/**
 * A pool allocator to help reduce calls to the underlying allocation and deallocation functions.
 **/
template <bool kThreadSafe>
class PoolAllocatorT {
 public:
  /**
   * Construct a PoolAllocator.
   *
   * @param chunkSize The chunk size for each pool allocation
   * @param allocSize The size of underlying slabs to be chunked
   * @param allocFunc The underlying allocation function for allocating slabs
   * @param deallocFunc The underlying deallocation function.  Currently only called on destruction.
   **/
  DISPENSO_DLL_ACCESS PoolAllocatorT(
      size_t chunkSize,
      size_t allocSize,
      std::function<void*(size_t)> allocFunc,
      std::function<void(void*)> deallocFunc);

  /**
   * Allocate a chunk from a slab
   *
   * @return The pointer to a buffer of chunkSize bytes
   **/
  DISPENSO_DLL_ACCESS char* alloc();

  /**
   * Deallocate a previously allocated chunk
   *
   * @param ptr The chunk to return to the available pool
   **/
  DISPENSO_DLL_ACCESS void dealloc(char* ptr);

  /**
   * Effectively dealloc all previously allocated chunks.  Useful for arenas.
   * This function is not thread safe, and no previously allocated chunks may be dealloc'd after
   * clear.
   **/
  DISPENSO_DLL_ACCESS void clear();

  /**
   * Get the total capicity allocated in chunks (how many alloc() could be called without triggering
   * allocFunc() if all chunks were available)
   **/
  size_t totalChunkCapacity() const {
    return (backingAllocs2_.size() + backingAllocs_.size()) * chunksPerAlloc_;
  }
  /**
   * Destruct a PoolAllocator
   **/
  DISPENSO_DLL_ACCESS ~PoolAllocatorT();

 private:
  const size_t chunkSize_;
  const size_t allocSize_;
  const size_t chunksPerAlloc_;

  std::function<void*(size_t)> allocFunc_;
  std::function<void(void*)> deallocFunc_;

  // Use of a spin lock was found to be faster than std::mutex in benchmarks.
  alignas(kCacheLineSize) std::atomic<uint32_t> backingAllocLock_{0};
  std::vector<char*> backingAllocs_;
  std::vector<char*> backingAllocs2_;

  std::vector<char*> chunks_;
};

/** Thread-safe pool allocator with internal locking. */
using PoolAllocator = PoolAllocatorT<true>;
/** Pool allocator without locking, for single-threaded use or external synchronization. */
using NoLockPoolAllocator = PoolAllocatorT<false>;

} // namespace dispenso


================================================
FILE: dispenso/priority.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/priority.h>

#if (defined(__unix__) || defined(unix)) && !defined(USG)
#include <sys/param.h>
#endif

#if defined(__linux__)
#include <pthread.h>
#include <sys/resource.h>
#include <unistd.h>
#elif defined(__MACH__)
#include <mach/mach_time.h>
#include <mach/thread_act.h>
#include <pthread.h>
#elif defined(_WIN32)
#include <Windows.h>
#elif defined(BSD)
#include <sys/rtprio.h>
#include <sys/types.h>
#endif

namespace dispenso {

namespace {
DISPENSO_THREAD_LOCAL ThreadPriority g_threadPriority = ThreadPriority::kNormal;
} // namespace

ThreadPriority getCurrentThreadPriority() {
  return g_threadPriority;
}

#ifdef __MACH__
bool setCurrentThreadPriority(ThreadPriority prio) {
  mach_port_t threadport = pthread_mach_thread_np(pthread_self());
  if (prio == ThreadPriority::kRealtime) {
    mach_timebase_info_data_t info;
    mach_timebase_info(&info);
    double msToAbsTime = ((double)info.denom / (double)info.numer) * 1000000.0;
    thread_time_constraint_policy_data_t time_constraints;
    time_constraints.period = 0;
    time_constraints.computation = static_cast<uint32_t>(1.0 * msToAbsTime);
    time_constraints.constraint = static_cast<uint32_t>(10.0 * msToAbsTime);
    time_constraints.preemptible = 0;

    if (thread_policy_set(
            threadport,
            THREAD_TIME_CONSTRAINT_POLICY,
            (thread_policy_t)&time_constraints,
            THREAD_TIME_CONSTRAINT_POLICY_COUNT) != KERN_SUCCESS) {
      return false;
    }
  }

  // https://fergofrog.com/code/cbowser/xnu/osfmk/kern/sched.h.html#_M/MAXPRI_USER
  struct thread_precedence_policy ttcpolicy;

  switch (prio) {
    case ThreadPriority::kLow:
      ttcpolicy.importance = 20;
      break;
    case ThreadPriority::kNormal:
      ttcpolicy.importance = 37;
      break;
    case ThreadPriority::kHigh: // fallthrough
    case ThreadPriority::kRealtime:
      ttcpolicy.importance = 63;
      break;
  }

  if (thread_policy_set(
          threadport,
          THREAD_PRECEDENCE_POLICY,
          (thread_policy_t)&ttcpolicy,
          THREAD_PRECEDENCE_POLICY_COUNT) != KERN_SUCCESS) {
    return false;
  }

  g_threadPriority = prio;
  return true;
}
#elif defined(_WIN32)
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
bool setCurrentThreadPriority(ThreadPriority prio) {
  if (prio == ThreadPriority::kRealtime) {
    if (!SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)) {
      return false;
    }
  }

  if (prio == ThreadPriority::kHigh) {
    // Best effort
    SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
  }

  bool success = false;
  switch (prio) {
    case ThreadPriority::kLow:
      success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_LOWEST);
      break;
    case ThreadPriority::kNormal:
      success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);
      break;
    case ThreadPriority::kHigh: // fallthrough
    case ThreadPriority::kRealtime:
      success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
      break;
  }

  if (!success) {
    return false;
  }

  g_threadPriority = prio;
  return true;
}
#elif defined(__linux__)
bool setCurrentThreadPriority(ThreadPriority prio) {
  if (prio == ThreadPriority::kRealtime) {
    struct sched_param param;
    param.sched_priority = 99;
    if (pthread_setschedparam(pthread_self(), SCHED_FIFO, &param)) {
      return false;
    }
  }

  switch (prio) {
    case ThreadPriority::kLow:
      errno = 0;
      (void)!nice(10);
      break;
    case ThreadPriority::kNormal:
      errno = 0;
      (void)!nice(0);
      break;
    case ThreadPriority::kHigh: // fallthrough
    case ThreadPriority::kRealtime: {
      struct rlimit rlim;
      getrlimit(RLIMIT_NICE, &rlim);
      if (rlim.rlim_max <= 20) {
        return false;
      }
      rlim.rlim_cur = rlim.rlim_max;
      setrlimit(RLIMIT_NICE, &rlim);
      errno = 0;
      (void)!nice(static_cast<int>(20 - rlim.rlim_max));
    }
  }
  if (errno != 0) {
    return false;
  }
  g_threadPriority = prio;
  return true;
}
#elif defined(__FreeBSD__)
// TODO: Find someone who has a FreeBSD system to test this code.
bool setCurrentThreadPriority(ThreadPriority prio) {
  struct rtprio rtp;

  if (prio == ThreadPriority::kRealtime) {
    rtp.type = RTP_PRIO_REALTIME;
    rtp.prio = 10;
    if (rtprio_thread(RTP_SET, 0, &rtp)) {
      return false;
    }
  } else {
    rtp.type = RTP_PRIO_NORMAL;
    switch (prio) {
      case ThreadPriority::kLow:
        rtp.prio = 31;
        break;
      case ThreadPriority::kNormal:
        rtp.prio = 15;
        break;
      case ThreadPriority::kHigh: // fallthrough
      case ThreadPriority::kRealtime:
        rtp.prio = 0;
        break;
    }
    if (rtprio_thread(RTP_SET, 0, &rtp)) {
      return false;
    }
  }
  g_threadPriority = prio;
  return true;
}
#else
bool setCurrentThreadPriority(ThreadPriority prio) {
  return false;
}

#endif // platform

} // namespace dispenso


================================================
FILE: dispenso/priority.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file priority.h
 * @ingroup group_util
 *
 * Utilities for getting and setting thread priority.  This is an attempt to unify concepts for
 * thread priority usefully across multiple platforms.  For finer control, use platform specific
 * functionality.
 *
 * @note When using higher-than-normal priority, use caution!  Too many threads running at too high
 * priority can have a strong negative impact on the responsivity of the machine.  Prefer to use
 * realtime priority only for short running tasks that need to be very responsively run.
 **/

#pragma once

#include <dispenso/platform.h>

namespace dispenso {

/**
 * A thread priority setting.  Enum values in increasing order of priority.
 **/
enum class ThreadPriority { kLow, kNormal, kHigh, kRealtime };

/**
 * Access the current thread priority as set by setCurrentThreadPriority.
 *
 * @return The priority of the current thread
 *
 * @note If the current thread priority has been set via a platform-specific mechanism, this may
 * return an incorrect value.
 **/
DISPENSO_DLL_ACCESS ThreadPriority getCurrentThreadPriority();

/**
 * Set the current thread's priority
 *
 * @param prio The priority to set to
 *
 * @return true if the priority was modified, false otherwise.
 **/
DISPENSO_DLL_ACCESS bool setCurrentThreadPriority(ThreadPriority prio);

} // namespace dispenso


================================================
FILE: dispenso/resource_pool.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file resource_pool.h
 * @ingroup group_containers
 * A file providing ResourcePool.  This is syntactic sugar over what is essentially a set of
 * semaphore guarded resources.
 **/

#pragma once

#include <dispenso/platform.h>
#include <dispenso/tsan_annotations.h>
#include <moodycamel/blockingconcurrentqueue.h>

namespace dispenso {

template <typename T>
class ResourcePool;

/**
 * A RIAA wrapper for a user's type that can manage accessibility and ensures the resource will go
 * back into the ResourcePool upon destruction.
 **/
template <typename T>
class Resource {
 public:
  /** Move constructor. */
  Resource(Resource&& other) : resource_(other.resource_), pool_(other.pool_) {
    other.resource_ = nullptr;
  }

  Resource& operator=(Resource&& other) {
    if (&other != this) {
      recycle();
      resource_ = other.resource_;
      pool_ = other.pool_;
      other.resource_ = nullptr;
    }
    return *this;
  }

  /**
   * Access the underlying resource object.
   *
   * @return a reference to the resource.
   **/
  T& get() {
    return *resource_;
  }

  ~Resource() {
    recycle();
  }

 private:
  Resource(T* res, ResourcePool<T>* pool) : resource_(res), pool_(pool) {}

  void recycle();

  T* resource_;
  ResourcePool<T>* pool_;

  friend class ResourcePool<T>;
};

/**
 * A pool of resources that can be accessed from multiple threads.  This is akin to a set of
 * resources and a semaphore ensuring enough resources exist.
 **/
template <typename T>
class ResourcePool {
 public:
  /**
   * Construct a ResourcePool.
   *
   * @param size The number of <code>T</code> objects in the pool.
   * @param init A functor with signature T() which can be called to initialize the pool's
   * resources.
   **/
  template <typename F>
  ResourcePool(size_t size, const F& init)
      : pool_(size),
        backingResources_(
            reinterpret_cast<char*>(
                detail::alignedMalloc(size * detail::alignToCacheLine(sizeof(T))))),
        size_(size) {
    char* buf = backingResources_;

    // There are three reasons we create our own buffer and use placement new:
    // 1. We want to be able to handle non-movable non-copyable objects
    //   * Note that we could do this with std::deque
    // 2. We want to minimize memory allocations, since that can be a common point of contention in
    //    multithreaded programs.
    // 3. We can easily ensure that the objects are cache aligned to help avoid false sharing.

    for (size_t i = 0; i < size; ++i) {
      pool_.enqueue(new (buf) T(init()));
      buf += detail::alignToCacheLine(sizeof(T));
    }
  }

  /**
   * Acquire a resource from the pool.  This function may block until a resource becomes available.
   *
   * @return a <code>Resource</code>-wrapped resource.
   **/
  Resource<T> acquire() {
    T* t;
    DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
    pool_.wait_dequeue(t);
    DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
    return Resource<T>(t, this);
  }

  /**
   * Destruct the ResourcePool.  The user must ensure that all resources are returned to the pool
   * prior to destroying the pool.
   **/
  ~ResourcePool() {
    assert(pool_.size_approx() == size_);
    for (size_t i = 0; i < size_; ++i) {
      T* t;
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
      pool_.wait_dequeue(t);
      DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
      t->~T();
    }
    detail::alignedFree(backingResources_);
  }

 private:
  void recycle(T* t) {
    DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
    pool_.enqueue(t);
    DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
  }

  moodycamel::BlockingConcurrentQueue<T*> pool_;
  char* backingResources_;
  size_t size_;

  friend class Resource<T>;
};

template <typename T>
void Resource<T>::recycle() {
  if (resource_) {
    pool_->recycle(resource_);
  }
}

} // namespace dispenso


================================================
FILE: dispenso/rw_lock.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file rw_lock.h
 * @ingroup group_sync
 * A file providing RWLock, a reader/writer lock interface.
 **/

#include <dispenso/detail/rw_lock_impl.h>

namespace dispenso {

/**
 * A reader/writer lock interface compatible with std::shared_mutex (for use with std::unique_lock
 * and std::shared_lock).  The interface is designed to be very fast in the face of high levels of
 * contention for high read traffic and low write traffic.
 *
 * @note RWLock is not as fully-featured as std::shared_mutex: It does not go to the OS to wait.
 * This behavior is good for guarding very fast operations, but less good for guarding very slow
 * operations.  Additionally, RWLock is not compatible with std::condition_variable, though
 * std::condition_variable_any may work (untested).  It could be possible to extend RWLock with it's
 * own ConditionVariable, make waiting operations sleep in the OS, and also to add timed functions;
 * however those may slow things down in the fast case.  If some/all of that functionality is
 * needed, use std::shared_mutex, or develop a new type.
 **/
class alignas(kCacheLineSize) RWLock : public detail::RWLockImpl {
 public:
  /**
   * Locks for write access
   *
   * @note It is undefined behavior to recursively lock
   **/
  using detail::RWLockImpl::lock;

  /**
   * Tries to lock for write access, returns if unable to lock
   *
   * @return true if lock was acquired, false otherwise
   **/
  using detail::RWLockImpl::try_lock;

  /**
   * Unlocks write access
   *
   * @note Must already be locked by the current thread of execution, otherwise, the behavior is
   * undefined.
   **/
  using detail::RWLockImpl::unlock;

  /**
   * Locks for read access
   *
   * @note It is undefined behavior to recursively lock
   **/
  using detail::RWLockImpl::lock_shared;

  /**
   * Tries to lock for read access, returns if unable to lock
   *
   * @return true if lock was acquired, false otherwise
   *
   * @note It is undefined behavior to recursively lock
   **/
  using detail::RWLockImpl::try_lock_shared;

  /**
   * Unlocks read access
   *
   * @note Must already be locked by the current thread of execution, otherwise, the behavior is
   * undefined.
   **/
  using detail::RWLockImpl::unlock_shared;

  /**
   * Upgrade from a reader lock to a writer lock.  lock_upgrade is a power-user interface.  There is
   * a very good reason why it is not exposed as upgrade_mutex in the standard.  To use it safely,
   * you *MUST* ensure only one thread can try to lock for write concurrently.  If that cannot be
   * guaranteed, you should unlock for read, and lock for write instead of using lock_upgrade to
   * avoid potential deadlock.
   *
   * @note Calling this if the writer lock is already held, or if no reader lock is already held is
   * undefined behavior.
   **/
  using detail::RWLockImpl::lock_upgrade;

  /**
   * Downgrade the lock from a writer lock to a reader lock.
   *
   * @note Calling this if the writer lock is not held results in undefined behavior
   **/
  using detail::RWLockImpl::lock_downgrade;
};

/**
 * An unaligned version of the RWLock.  This could be useful if you e.g. want to create an array of
 * these to guard a large number of slots, and the likelihood of multiple threads touching any
 * region concurrently is low.  All other behavior remains the same, so refer to the documentation
 * for RWLock.
 **/
class UnalignedRWLock : public detail::RWLockImpl {};

} // namespace dispenso


================================================
FILE: dispenso/schedulable.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file schedulable.h
 * @ingroup group_core
 * Classes providing simple schedulables that match scheduling interfaces of *TaskSet and ThreadPool
 *
 **/

#pragma once

#include <dispenso/detail/completion_event_impl.h>
#include <dispenso/task_set.h>

namespace dispenso {

/**
 * A class fullfilling the Schedulable concept that immediately invokes the functor.  This can be
 * used in place of <code>ThreadPool</code> or <code>TaskSet</code> with <code>Future</code>s at
 * construction or through <code>then</code>, or it may be used in TimedTask scheduling for
 * short-running tasks.
 **/
class ImmediateInvoker {
 public:
  /**
   * Schedule a functor to be executed.  It will be invoked immediately.
   *
   * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
   * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
   * std::function or similarly type-erased objects will also work.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f) const {
    f();
  }

  /**
   * Schedule a functor to be executed.  It is a bit oxymoronical to call this function, since
   * ForceQueuingTag will have no effect, and it's use is discouraged.
   *
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f, ForceQueuingTag) const {
    f();
  }
};

constexpr ImmediateInvoker kImmediateInvoker;

/**
 * A class fullfilling the Schedulable concept that always invokes on a new thread.  This can be
 * used in place of <code>ThreadPool</code> or <code>TaskSet</code> with <code>Future</code>s at
 * construction or through <code>then</code>.
 **/
class NewThreadInvoker {
 public:
  /**
   * Schedule a functor to be executed on a new thread.
   *
   * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
   * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
   * std::function or similarly type-erased objects will also work.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f) const {
    schedule(std::forward<F>(f), ForceQueuingTag());
  }
  /**
   * Schedule a functor to be executed on a new thread.
   *
   * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
   * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
   * std::function or similarly type-erased objects will also work.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f, ForceQueuingTag) const {
    std::thread thread([f = std::move(f)]() { f(); });
    thread.detach();
  }

 private:
};

constexpr NewThreadInvoker kNewThreadInvoker;

} // namespace dispenso


================================================
FILE: dispenso/small_buffer_allocator.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/detail/small_buffer_allocator_impl.h>
#include <dispenso/small_buffer_allocator.h>

#include <new>

namespace dispenso {
namespace detail {

template <size_t kChunkSize>
SmallBufferGlobals& getSmallBufferGlobals() {
  // controlled leak here
  static SmallBufferGlobals* globals = new SmallBufferGlobals();
  return *globals;
}

char* allocSmallBufferImpl(size_t ordinal) {
  switch (ordinal) {
    case 0:
      return detail::SmallBufferAllocator<4>::alloc();
    case 1:
      return detail::SmallBufferAllocator<8>::alloc();
    case 2:
      return detail::SmallBufferAllocator<16>::alloc();
    case 3:
      return detail::SmallBufferAllocator<32>::alloc();
    case 4:
      return detail::SmallBufferAllocator<64>::alloc();
    case 5:
      return detail::SmallBufferAllocator<128>::alloc();
    case 6:
      return detail::SmallBufferAllocator<256>::alloc();
    default:
      assert(false && "Invalid small buffer ordinal requested");
      return nullptr;
  }
}

void deallocSmallBufferImpl(size_t ordinal, void* buf) {
  switch (ordinal) {
    case 0:
      detail::SmallBufferAllocator<4>::dealloc(reinterpret_cast<char*>(buf));
      break;
    case 1:
      detail::SmallBufferAllocator<8>::dealloc(reinterpret_cast<char*>(buf));
      break;
    case 2:
      detail::SmallBufferAllocator<16>::dealloc(reinterpret_cast<char*>(buf));
      break;
    case 3:
      detail::SmallBufferAllocator<32>::dealloc(reinterpret_cast<char*>(buf));
      break;
    case 4:
      detail::SmallBufferAllocator<64>::dealloc(reinterpret_cast<char*>(buf));
      break;
    case 5:
      detail::SmallBufferAllocator<128>::dealloc(reinterpret_cast<char*>(buf));
      break;
    case 6:
      detail::SmallBufferAllocator<256>::dealloc(reinterpret_cast<char*>(buf));
      break;
    default:
      assert(false && "Invalid small buffer ordinal requested");
  }
}

size_t approxBytesAllocatedSmallBufferImpl(size_t ordinal) {
  switch (ordinal) {
    case 0:
      return detail::SmallBufferAllocator<4>::bytesAllocated();
    case 1:
      return detail::SmallBufferAllocator<8>::bytesAllocated();
    case 2:
      return detail::SmallBufferAllocator<16>::bytesAllocated();
    case 3:
      return detail::SmallBufferAllocator<32>::bytesAllocated();
    case 4:
      return detail::SmallBufferAllocator<64>::bytesAllocated();
    case 5:
      return detail::SmallBufferAllocator<128>::bytesAllocated();
    case 6:
      return detail::SmallBufferAllocator<256>::bytesAllocated();
    default:
      assert(false && "Invalid small buffer ordinal requested");
      return 0;
  }
}

template <size_t kChunkSize>
SmallBufferAllocator<kChunkSize>::PerThreadQueuingData::~PerThreadQueuingData() {
  enqueue_bulk(buffers_, count_);

  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
  ptoken().~ProducerToken();
  ctoken().~ConsumerToken();
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
}

template class SmallBufferAllocator<4>;
template class SmallBufferAllocator<8>;
template class SmallBufferAllocator<16>;
template class SmallBufferAllocator<32>;
template class SmallBufferAllocator<64>;
template class SmallBufferAllocator<128>;
template class SmallBufferAllocator<256>;

} // namespace detail
} // namespace dispenso


================================================
FILE: dispenso/small_buffer_allocator.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file small_buffer_allocator.h
 * @ingroup group_alloc
 * A file providing SmallBufferAllocator.  This allocator can allocate and deallocate chunks of a
 * set size in a way that is efficient and scales quite well across many threads.
 **/

#pragma once

#include <dispenso/detail/math.h>
#include <dispenso/platform.h>

namespace dispenso {

/**
 * Set a standard for the maximum chunk size for use within dispenso.  The reason for this limit is
 * that there are diminishing returns after a certain size, and each new pool has it's own memory
 * overhead.
 **/
constexpr size_t kMaxSmallBufferSize = 256;

namespace detail {

DISPENSO_DLL_ACCESS char* allocSmallBufferImpl(size_t ordinal);
DISPENSO_DLL_ACCESS void deallocSmallBufferImpl(size_t ordinal, void* buf);

DISPENSO_DLL_ACCESS size_t approxBytesAllocatedSmallBufferImpl(size_t ordinal);

// This has the effect of selecting actual block sizes starting with 4 bytes.  Smaller requests
// (e.g. 1 byte, 2 bytes) will still utilize 4-byte blocks.  Choice of 4 bytes as the smallest
// mainly aligns to sizeof(ptr) on 32-bit platforms, where we'd expect most common use cases to be
// no smaller than one pointer.  Retaining 4-byte buckets on 64-bit platforms doesn't cost much
// (tiny startup/teardown cost, and trivial amount of memory) when not using 4-byte or smaller
// allocations, and makes the code simpler.
constexpr size_t getOrdinal(size_t blockSize) {
  return static_cast<size_t>(std::max<ssize_t>(0, static_cast<ssize_t>(log2const(blockSize)) - 2));
}

template <size_t kBlockSize>
inline std::enable_if_t<(kBlockSize <= kMaxSmallBufferSize), char*> allocSmallOrLarge() {
#if defined(DISPENSO_NO_SMALL_BUFFER_ALLOCATOR)
  return reinterpret_cast<char*>(alignedMalloc(kBlockSize, kBlockSize));
#else
  return allocSmallBufferImpl(getOrdinal(kBlockSize));
#endif // DISPENSO_NO_SMALL_BUFFER_ALLOCATOR
}

template <size_t kBlockSize>
inline std::enable_if_t<(kBlockSize > kMaxSmallBufferSize), char*> allocSmallOrLarge() {
  return reinterpret_cast<char*>(alignedMalloc(kBlockSize, kBlockSize));
}

template <size_t kBlockSize>
inline std::enable_if_t<(kBlockSize <= kMaxSmallBufferSize), void> deallocSmallOrLarge(void* buf) {
#if defined(DISPENSO_NO_SMALL_BUFFER_ALLOCATOR)
  alignedFree(buf);
#else
  deallocSmallBufferImpl(getOrdinal(kBlockSize), buf);
#endif // DISPENSO_NO_SMALL_BUFFER_ALLOCATOR
}

template <size_t kBlockSize>
inline std::enable_if_t<(kBlockSize > kMaxSmallBufferSize), void> deallocSmallOrLarge(void* buf) {
  alignedFree(buf);
}

} // namespace detail

/**
 * Allocate a small buffer from a small buffer pool.
 *
 * @tparam kBlockSize The size of the block to allocate.  Must be a power of two, and must be less
 * than or equal to kMaxSmallBufferSize.
 * @return The pointer to the allocated block of memory.
 * @note: The returned buffer must be returned to the pool via deallocSmallBuffer templatized on the
 * same block size.  If kBlockSize > kMaxSmallBufferSize, this function falls back on alignedMalloc.
 * If DISPENSO_NO_SMALL_BUFFER_ALLOCATOR is defined, we will always fall back on
 * alignedMalloc/alignedFree.
 **/
template <size_t kBlockSize>
inline char* allocSmallBuffer() {
  return detail::allocSmallOrLarge<kBlockSize>();
}
/**
 * Free a small buffer from a small buffer pool.
 *
 * @tparam kBlockSize The size of the block to allocate.  Must be a power of two, and must be less
 * than or equal to kMaxSmallBufferSize.
 * @param buf the pointer to block of memory to return to the pool.  Must have been allocated with
 * allocSmallBuffer templatized on the same block size.
 * @note: If kBlockSize > kMaxSmallBufferSize, this function falls back on alignedFree.
 **/
template <size_t kBlockSize>
inline void deallocSmallBuffer(void* buf) {
  detail::deallocSmallOrLarge<kBlockSize>(buf);
}

/**
 * Get the approximate bytes allocated for a single small buffer pool (associated with
 *kBlockSize). This function is not highly performant and locks, and should only be used for
 *diagnostics (e.g. tests).
 *
 * @tparam kBlockSize The block size for the pool to query.
 **/
template <size_t kBlockSize>
size_t approxBytesAllocatedSmallBuffer() {
  return detail::approxBytesAllocatedSmallBufferImpl(detail::getOrdinal(kBlockSize));
}

} // namespace dispenso


================================================
FILE: dispenso/small_vector.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file small_vector.h
 * @ingroup group_util
 * A vector-like container with inline storage for small sizes.
 *
 * SmallVector stores up to N elements inline (on the stack), falling back to
 * heap allocation when size exceeds N. Uses the high bit of the size field to
 * track storage mode, keeping the struct compact with no extra flags.
 *
 * Once transitioned to heap storage, the vector stays on heap until clear()
 * is called. This ensures reserve() guarantees are honored and avoids
 * unnecessary copies between storage modes.
 **/

#pragma once

#include <cassert>
#include <cstddef>
#include <initializer_list>
#include <new>
#include <utility>

namespace dispenso {

/**
 * A vector-like container that stores up to N elements inline (on the stack),
 * avoiding heap allocation for small sizes. Falls back to heap allocation when
 * the size exceeds N.
 *
 * @tparam T The element type.
 * @tparam N The number of elements to store inline (default 4). Must be in range [1, 65535].
 *
 * Memory layout: Uses a union to share space between inline storage and heap
 * pointer/capacity. The high bit of the size field tracks whether heap storage
 * is active, so no additional bool or flag field is needed.
 *
 * Once on heap, the vector stays on heap until clear() is called. This ensures
 * reserve() capacity is preserved across push_back/pop_back sequences.
 */
template <typename T, size_t N = 4>
class SmallVector {
  static_assert(N > 0, "SmallVector requires at least 1 inline element. Use std::vector for N=0.");
  static_assert(N < 65536, "SmallVector inline capacity is too large. Use std::vector instead.");

 public:
  using value_type = T;
  using size_type = size_t;
  using difference_type = std::ptrdiff_t;
  using reference = T&;
  using const_reference = const T&;
  using pointer = T*;
  using const_pointer = const T*;
  using iterator = T*;
  using const_iterator = const T*;

  /**
   * Construct an empty SmallVector.
   **/
  SmallVector() noexcept : size_(0) {}

  /**
   * Construct a SmallVector with @p count default-constructed elements.
   *
   * @param count The number of elements.
   **/
  explicit SmallVector(size_type count) : size_(0) {
    resize(count);
  }

  /**
   * Construct a SmallVector with @p count copies of @p value.
   *
   * @param count The number of elements.
   * @param value The value to copy into each element.
   **/
  SmallVector(size_type count, const T& value) : size_(0) {
    resize(count, value);
  }

  /**
   * Construct a SmallVector from an initializer list.
   *
   * @param init The initializer list of elements.
   **/
  SmallVector(std::initializer_list<T> init) : size_(0) {
    ensureCapacity(init.size());
    for (const auto& v : init) {
      emplace_back(v);
    }
  }

  /** Copy constructor. **/
  SmallVector(const SmallVector& other) : size_(0) {
    ensureCapacity(other.rawSize());
    for (const auto& v : other) {
      emplace_back(v);
    }
  }

  /** Move constructor. Moves elements and leaves @p other empty. **/
  SmallVector(SmallVector&& other) noexcept : size_(0) {
    if (other.isInline()) {
      for (size_type i = 0; i < other.rawSize(); ++i) {
        new (inlineData() + i) T(std::move(other.inlineData()[i]));
        other.inlineData()[i].~T();
      }
    } else {
      storage_.heap_.ptr = other.storage_.heap_.ptr;
      storage_.heap_.capacity = other.storage_.heap_.capacity;
      size_ |= kHeapBit;
    }
    size_ = (size_ & kHeapBit) | other.rawSize();
    other.size_ = 0;
  }

  ~SmallVector() {
    destroyAll();
  }

  /** Copy assignment operator. **/
  SmallVector& operator=(const SmallVector& other) {
    if (this != &other) {
      destroyAll();
      size_ = 0;
      ensureCapacity(other.rawSize());
      for (const auto& v : other) {
        emplace_back(v);
      }
    }
    return *this;
  }

  /** Move assignment operator. **/
  SmallVector& operator=(SmallVector&& other) noexcept {
    if (this != &other) {
      destroyAll();
      size_ = 0;

      if (other.isInline()) {
        for (size_type i = 0; i < other.rawSize(); ++i) {
          new (inlineData() + i) T(std::move(other.inlineData()[i]));
          other.inlineData()[i].~T();
        }
      } else {
        storage_.heap_.ptr = other.storage_.heap_.ptr;
        storage_.heap_.capacity = other.storage_.heap_.capacity;
        size_ |= kHeapBit;
      }
      size_ = (size_ & kHeapBit) | other.rawSize();
      other.size_ = 0;
    }
    return *this;
  }

  // --- Element Access ---

  /** Access element at @p pos (unchecked). **/
  reference operator[](size_type pos) {
    return data()[pos];
  }
  /** @copydoc operator[](size_type) **/
  const_reference operator[](size_type pos) const {
    return data()[pos];
  }
  /** Access the first element. **/
  reference front() {
    return data()[0];
  }
  /** @copydoc front() **/
  const_reference front() const {
    return data()[0];
  }
  /** Access the last element. **/
  reference back() {
    return data()[rawSize() - 1];
  }
  /** @copydoc back() **/
  const_reference back() const {
    return data()[rawSize() - 1];
  }

  /** Return a pointer to the underlying element storage. **/
  pointer data() noexcept {
    return isInline() ? inlineData() : storage_.heap_.ptr;
  }
  /** @copydoc data() **/
  const_pointer data() const noexcept {
    return isInline() ? inlineData() : storage_.heap_.ptr;
  }

  // --- Iterators ---

  /** Return an iterator to the first element. **/
  iterator begin() noexcept {
    return data();
  }
  /** @copydoc begin() **/
  const_iterator begin() const noexcept {
    return data();
  }
  /** @copydoc begin() **/
  const_iterator cbegin() const noexcept {
    return data();
  }
  /** Return an iterator past the last element. **/
  iterator end() noexcept {
    return data() + rawSize();
  }
  /** @copydoc end() **/
  const_iterator end() const noexcept {
    return data() + rawSize();
  }
  /** @copydoc end() **/
  const_iterator cend() const noexcept {
    return data() + rawSize();
  }

  // --- Capacity ---

  /** Check whether the vector is empty. **/
  bool empty() const noexcept {
    return rawSize() == 0;
  }
  /** Return the number of elements. **/
  size_type size() const noexcept {
    return rawSize();
  }

  /**
   * Return the current capacity.
   * Returns N while using inline storage, or the heap capacity otherwise.
   **/
  size_type capacity() const noexcept {
    return isInline() ? N : storage_.heap_.capacity;
  }

  /**
   * Reserve capacity for at least newCap elements.
   * If newCap > N, transitions to heap storage.
   * Once on heap, capacity is preserved until clear().
   */
  void reserve(size_type newCap) {
    ensureCapacity(newCap);
  }

  // --- Modifiers ---

  /**
   * Remove all elements and release heap storage (if any).
   * After clear(), the vector returns to inline storage mode.
   **/
  void clear() noexcept {
    destroyAll();
    size_ = 0;
  }

  /** Append a copy of @p value. **/
  void push_back(const T& value) {
    emplace_back(value);
  }
  /** Append @p value by moving. **/
  void push_back(T&& value) {
    emplace_back(std::move(value));
  }

  /**
   * Construct an element in-place at the end.
   *
   * @param args Arguments forwarded to the element constructor.
   * @return A reference to the newly constructed element.
   **/
  template <typename... Args>
  reference emplace_back(Args&&... args) {
    T* ptr;
    if (isInline()) {
      size_type sz = rawSize();
      if (sz < N) {
        ptr = inlineData();
      } else {
        growToHeap(N * 2);
        ptr = storage_.heap_.ptr;
      }
    } else {
      size_type sz = rawSize();
      if (sz == storage_.heap_.capacity) {
        growToHeap(storage_.heap_.capacity * 2);
      }
      ptr = storage_.heap_.ptr;
    }
    size_type idx = rawSize();
    new (ptr + idx) T(std::forward<Args>(args)...);
    // Increment preserves heap bit naturally
    ++size_;
    assert(rawSize() > 0 && "Size overflow into heap bit");
    return ptr[idx];
  }

  /** Remove the last element. **/
  void pop_back() {
    T* ptr = data();
    size_type sz = rawSize();
    ptr[sz - 1].~T();
    // Decrement preserves heap bit naturally
    --size_;
  }

  /**
   * Resize the vector to @p count elements, default-constructing new elements if growing.
   *
   * @param count The desired number of elements.
   **/
  void resize(size_type count) {
    size_type sz = rawSize();
    if (count > sz) {
      ensureCapacity(count);
      T* ptr = data();
      for (size_type i = sz; i < count; ++i) {
        new (ptr + i) T();
      }
      setSize(count);
    } else if (count < sz) {
      T* ptr = data();
      for (size_type i = count; i < sz; ++i) {
        ptr[i].~T();
      }
      setSize(count);
    }
  }

  /**
   * Resize the vector to @p count elements, copy-constructing new elements from @p value if
   * growing.
   *
   * @param count The desired number of elements.
   * @param value The value to copy into new elements.
   **/
  void resize(size_type count, const T& value) {
    size_type sz = rawSize();
    if (count > sz) {
      ensureCapacity(count);
      T* ptr = data();
      for (size_type i = sz; i < count; ++i) {
        new (ptr + i) T(value);
      }
      setSize(count);
    } else if (count < sz) {
      T* ptr = data();
      for (size_type i = count; i < sz; ++i) {
        ptr[i].~T();
      }
      setSize(count);
    }
  }

  /**
   * Erase the element at @p pos.
   * Elements after @p pos are shifted left. Returns an iterator to the element that now
   * occupies the erased position (or end() if the last element was erased).
   *
   * @param pos Iterator to the element to erase.
   * @return Iterator to the element following the erased one.
   **/
  iterator erase(const_iterator pos) {
    T* ptr = data();
    size_type sz = rawSize();
    size_type index = pos - ptr;

    for (size_type i = index; i + 1 < sz; ++i) {
      ptr[i] = std::move(ptr[i + 1]);
    }
    ptr[sz - 1].~T();
    --size_; // Preserves heap bit
    return data() + index;
  }

 private:
  // High bit of size_ tracks heap vs inline mode.
  // Since kHeapBit >> N, comparisons like size_ <= N and size_ < N
  // are naturally false when the heap bit is set, so isInline() and
  // many internal checks work without masking.
  static constexpr size_type kHeapBit = size_type(1) << (sizeof(size_type) * 8 - 1);
  static constexpr size_type kSizeMask = ~kHeapBit;

  bool isInline() const noexcept {
    return (size_ & kHeapBit) == 0;
  }

  size_type rawSize() const noexcept {
    return size_ & kSizeMask;
  }

  // Set the size portion, preserving the heap bit.
  void setSize(size_type s) noexcept {
    assert((s & kHeapBit) == 0 && "Size overflow into heap bit");
    size_ = (size_ & kHeapBit) | s;
  }

  T* inlineData() noexcept {
    return reinterpret_cast<T*>(&storage_.inline_);
  }
  const T* inlineData() const noexcept {
    return reinterpret_cast<const T*>(&storage_.inline_);
  }

  void destroyAll() noexcept {
    T* ptr = data();
    size_type sz = rawSize();
    for (size_type i = 0; i < sz; ++i) {
      ptr[i].~T();
    }
    if (!isInline()) {
      ::operator delete(storage_.heap_.ptr);
    }
  }

  // Grow to heap storage with the specified capacity.
  // Moves existing elements, frees old heap if applicable, sets heap bit.
  void growToHeap(size_type newCap) {
    T* newData = static_cast<T*>(::operator new(newCap * sizeof(T)));
    T* oldData = data();
    size_type sz = rawSize();

    for (size_type i = 0; i < sz; ++i) {
      new (newData + i) T(std::move(oldData[i]));
      oldData[i].~T();
    }

    if (!isInline()) {
      ::operator delete(storage_.heap_.ptr);
    }

    storage_.heap_.ptr = newData;
    storage_.heap_.capacity = newCap;
    size_ = kHeapBit | sz;
  }

  void ensureCapacity(size_type newCap) {
    if (newCap <= N && isInline()) {
      return;
    }
    if (isInline()) {
      growToHeap(newCap);
    } else if (newCap > storage_.heap_.capacity) {
      growToHeap(newCap);
    }
  }

  size_type size_;

  struct HeapStorage {
    T* ptr;
    size_type capacity;
  };

  union Storage {
    alignas(T) unsigned char inline_[sizeof(T) * N];
    HeapStorage heap_;
    Storage() noexcept {}
    ~Storage() {}
  } storage_;
};

} // namespace dispenso


================================================
FILE: dispenso/spsc_ring_buffer.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file spsc_ring_buffer.h
 * @ingroup group_util
 * A lock-free single-producer single-consumer (SPSC) ring buffer.
 *
 * This buffer is designed for high-performance communication between exactly one producer
 * thread and one consumer thread. It uses a fixed-capacity circular buffer with atomic
 * head and tail pointers, providing wait-free operations in the common case.
 *
 * @note This implementation is NOT thread-safe for multiple producers or multiple consumers.
 *       Use only with exactly one producer thread and one consumer thread.
 **/

#pragma once

#include <atomic>
#include <cstddef>
#include <new>
#include <type_traits>
#include <utility>

#include <dispenso/platform.h>
#include <dispenso/util.h>

namespace dispenso {

// TODO(bbudge): Add kDynamicCapacity specialization that heap-allocates storage
// with runtime-determined capacity.

/**
 * @class SPSCRingBuffer
 * @brief A lock-free single-producer single-consumer ring buffer with fixed capacity.
 *
 * This class implements a bounded, lock-free ring buffer optimized for the case where
 * there is exactly one producer thread and exactly one consumer thread. It uses relaxed
 * memory ordering where possible and acquire/release semantics only where necessary
 * for correctness.
 *
 * The buffer stores elements in a contiguous array, avoiding dynamic memory allocation
 * after construction. The capacity is fixed at compile time via a template parameter.
 *
 * Unlike std::array-based implementations, this buffer does NOT require the element type
 * to be default-constructible. Elements are constructed in-place when pushed and destroyed
 * when popped.
 *
 * @tparam T The type of elements stored in the buffer. Must be move-constructible.
 * @tparam Capacity The minimum number of elements the buffer can hold. Must be at least 1.
 *                  Defaults to 16, which provides good cache locality.
 * @tparam RoundUpToPowerOfTwo If true (default), rounds up the internal buffer size to the
 *                             next power of two for faster index wrap-around using bitwise
 *                             AND instead of modulo. This may result in actual capacity being
 *                             larger than requested. Set to false to use exactly the requested
 *                             capacity.
 *
 * ## Thread Safety
 *
 * This class is designed for exactly one producer thread and one consumer thread:
 * - Only one thread may call `try_push()` or `try_emplace()` at any time
 * - Only one thread may call `try_pop()` at any time
 * - The producer and consumer may be different threads
 * - `empty()`, `full()`, and `size()` may be called from any thread, but provide
 *   only a snapshot that may be immediately stale
 *
 * ## Memory Ordering
 *
 * The implementation uses:
 * - `memory_order_relaxed` for local reads of head/tail
 * - `memory_order_acquire` when reading the "other" index (consumer reads head, producer
 * reads tail)
 * - `memory_order_release` when updating head/tail after successful push/pop
 *
 * ## Performance Characteristics
 *
 * - Push: O(1), wait-free when buffer is not full
 * - Pop: O(1), wait-free when buffer is not empty
 * - Memory: sizeof(T) * (actual capacity + 1) + 2 cache lines for head/tail indices
 * - When RoundUpToPowerOfTwo is true (default), index wrap-around uses fast bitwise AND
 *
 * ## Example Usage
 *
 * @code
 * // Default: rounds up to power of two for performance
 * dispenso::SPSCRingBuffer<int, 100> buffer;  // actual capacity is 127 (128 - 1)
 *
 * // Exact capacity mode (no rounding)
 * dispenso::SPSCRingBuffer<int, 100, false> exact;  // actual capacity is 100
 *
 * // Producer thread
 * if (buffer.try_push(42)) {
 *     // Success
 * }
 *
 * // Consumer thread
 * int value;
 * if (buffer.try_pop(value)) {
 *     // Use value
 * }
 * @endcode
 *
 * @see https://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue
 *      for background on lock-free queue algorithms
 */
template <typename T, size_t Capacity = 16, bool RoundUpToPowerOfTwo = true>
class SPSCRingBuffer {
  static_assert(Capacity >= 1, "SPSCRingBuffer capacity must be at least 1");
  static_assert(
      std::is_move_constructible<T>::value,
      "SPSCRingBuffer element type must be move-constructible");

 public:
  /**
   * @brief The type of elements stored in this buffer.
   */
  using value_type = T;

  /**
   * @brief The size type used for indices and counts.
   */
  using size_type = size_t;

  /**
   * @brief Constructs an empty ring buffer.
   *
   * The buffer is initialized with no elements. The internal storage is
   * uninitialized - elements are only constructed when pushed.
   *
   * @note Construction is not thread-safe. Ensure the buffer is fully
   *       constructed before any thread accesses it.
   */
  SPSCRingBuffer() = default;

  /**
   * @brief Ring buffers are not copyable.
   *
   * Copying a concurrent data structure would require synchronization
   * and could lead to subtle bugs. Use move semantics if you need to
   * transfer ownership.
   */
  SPSCRingBuffer(const SPSCRingBuffer&) = delete;

  /**
   * @brief Ring buffers are not copy-assignable.
   * @see SPSCRingBuffer(const SPSCRingBuffer&)
   */
  SPSCRingBuffer& operator=(const SPSCRingBuffer&) = delete;

  /**
   * @brief Ring buffers are not movable.
   *
   * Moving a ring buffer while producer/consumer threads are active
   * would be unsafe. If you need to transfer ownership, ensure all
   * threads have stopped first.
   */
  SPSCRingBuffer(SPSCRingBuffer&&) = delete;

  /**
   * @brief Ring buffers are not move-assignable.
   * @see SPSCRingBuffer(SPSCRingBuffer&&)
   */
  SPSCRingBuffer& operator=(SPSCRingBuffer&&) = delete;

  /**
   * @brief Destroys the ring buffer.
   *
   * All elements remaining in the buffer are destroyed. Ensure no
   * producer or consumer threads are accessing the buffer when it
   * is destroyed.
   *
   * @note Destruction is not thread-safe.
   */
  ~SPSCRingBuffer() {
    // Destroy any remaining elements
    size_t head = head_.load(std::memory_order_relaxed);
    size_t tail = tail_.load(std::memory_order_relaxed);
    while (head != tail) {
      elementAt(head)->~T();
      head = increment(head);
    }
  }

  /**
   * @brief Attempts to push an element into the buffer by moving.
   *
   * If the buffer has space, the element is moved into the buffer and
   * the function returns true. If the buffer is full, the function
   * returns false and the element is unchanged.
   *
   * @param item The element to push (will be moved from on success).
   * @return true if the element was successfully pushed, false if the buffer was full.
   *
   * @note Only one thread may call this function at any time (single producer).
   * @note This operation is wait-free.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<std::string, 4> buffer;
   * std::string msg = "hello";
   * if (buffer.try_push(std::move(msg))) {
   *     // msg is now empty (moved from)
   * } else {
   *     // msg is unchanged, buffer was full
   * }
   * @endcode
   */
  bool try_push(T&& item) {
    const size_t currentTail = tail_.load(std::memory_order_relaxed);
    const size_t nextTail = increment(currentTail);

    // Check if buffer is full
    if (nextTail == head_.load(std::memory_order_acquire)) {
      return false;
    }

    // Construct element in-place
    new (elementAt(currentTail)) T(std::move(item));
    tail_.store(nextTail, std::memory_order_release);
    return true;
  }

  /**
   * @brief Attempts to push an element into the buffer by copying.
   *
   * If the buffer has space, the element is copied into the buffer and
   * the function returns true. If the buffer is full, the function
   * returns false.
   *
   * @param item The element to push (will be copied).
   * @return true if the element was successfully pushed, false if the buffer was full.
   *
   * @note Only one thread may call this function at any time (single producer).
   * @note This operation is wait-free.
   * @note Prefer try_push(T&&) when the source element is no longer needed.
   */
  bool try_push(const T& item) {
    const size_t currentTail = tail_.load(std::memory_order_relaxed);
    const size_t nextTail = increment(currentTail);

    // Check if buffer is full
    if (nextTail == head_.load(std::memory_order_acquire)) {
      return false;
    }

    // Construct element in-place via copy
    new (elementAt(currentTail)) T(item);
    tail_.store(nextTail, std::memory_order_release);
    return true;
  }

  /**
   * @brief Attempts to construct an element in-place in the buffer.
   *
   * If the buffer has space, constructs an element directly in the buffer
   * storage using the provided arguments, avoiding any copy or move operations.
   *
   * @tparam Args The types of arguments to forward to T's constructor.
   * @param args The arguments to forward to the element constructor.
   * @return true if the element was successfully emplaced, false if the buffer was full.
   *
   * @note Only one thread may call this function at any time (single producer).
   * @note This operation is wait-free.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<std::pair<int, std::string>, 4> buffer;
   * if (buffer.try_emplace(42, "hello")) {
   *     // Element constructed in-place
   * }
   * @endcode
   */
  template <typename... Args>
  bool try_emplace(Args&&... args) {
    const size_t currentTail = tail_.load(std::memory_order_relaxed);
    const size_t nextTail = increment(currentTail);

    // Check if buffer is full
    if (nextTail == head_.load(std::memory_order_acquire)) {
      return false;
    }

    // Construct element in-place
    new (elementAt(currentTail)) T(std::forward<Args>(args)...);
    tail_.store(nextTail, std::memory_order_release);
    return true;
  }

  /**
   * @brief Attempts to pop an element from the buffer.
   *
   * If the buffer has elements, moves the front element into the output
   * parameter and returns true. If the buffer is empty, returns false
   * and leaves the output parameter unchanged.
   *
   * @param[out] item The location to move the popped element to.
   * @return true if an element was successfully popped, false if the buffer was empty.
   *
   * @note Only one thread may call this function at any time (single consumer).
   * @note This operation is wait-free.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<int, 4> buffer;
   * buffer.try_push(42);
   *
   * int value;
   * if (buffer.try_pop(value)) {
   *     assert(value == 42);
   * }
   * @endcode
   */
  bool try_pop(T& item) {
    const size_t currentHead = head_.load(std::memory_order_relaxed);

    // Check if buffer is empty
    if (currentHead == tail_.load(std::memory_order_acquire)) {
      return false;
    }

    T* elem = elementAt(currentHead);
    item = std::move(*elem);
    elem->~T();
    head_.store(increment(currentHead), std::memory_order_release);
    return true;
  }

  /**
   * @brief Attempts to pop an element from the buffer, returning an optional.
   *
   * If the buffer has elements, moves the front element into an OpResult
   * and returns it. If the buffer is empty, returns an empty OpResult.
   *
   * This provides a cleaner API than try_pop(T&) when default-constructibility
   * of T is not guaranteed or when a more functional style is preferred.
   *
   * @return An OpResult containing the popped element, or an empty OpResult
   *         if the buffer was empty.
   *
   * @note Only one thread may call this function at any time (single consumer).
   * @note This operation is wait-free.
   * @note In C++17 and beyond, you may prefer to use std::optional directly.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<std::string, 4> buffer;
   * buffer.try_push("hello");
   *
   * if (auto result = buffer.try_pop()) {
   *     std::cout << result.value() << std::endl;
   * }
   * @endcode
   */
  OpResult<T> try_pop() {
    const size_t currentHead = head_.load(std::memory_order_relaxed);

    // Check if buffer is empty
    if (currentHead == tail_.load(std::memory_order_acquire)) {
      return {};
    }

    T* elem = elementAt(currentHead);
    OpResult<T> result(std::move(*elem));
    elem->~T();
    head_.store(increment(currentHead), std::memory_order_release);
    return result;
  }

  /**
   * @brief Attempts to pop an element into uninitialized storage.
   *
   * Similar to try_pop, but uses placement new to construct the element
   * into the provided storage. This is useful when T is not default-constructible.
   *
   * @param[out] storage Pointer to uninitialized storage where the element will be
   *                     move-constructed. Must have proper alignment for T.
   * @return true if an element was successfully popped, false if the buffer was empty.
   *
   * @note Only one thread may call this function at any time (single consumer).
   * @note The caller is responsible for eventually destroying the constructed object.
   * @note This operation is wait-free.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<NonDefaultConstructible, 4> buffer;
   * alignas(NonDefaultConstructible) char storage[sizeof(NonDefaultConstructible)];
   * if (buffer.try_pop_into(reinterpret_cast<NonDefaultConstructible*>(storage))) {
   *     auto* ptr = reinterpret_cast<NonDefaultConstructible*>(storage);
   *     // use *ptr
   *     ptr->~NonDefaultConstructible();
   * }
   * @endcode
   */
  bool try_pop_into(T* storage) {
    const size_t currentHead = head_.load(std::memory_order_relaxed);

    // Check if buffer is empty
    if (currentHead == tail_.load(std::memory_order_acquire)) {
      return false;
    }

    T* elem = elementAt(currentHead);
    new (storage) T(std::move(*elem));
    elem->~T();
    head_.store(increment(currentHead), std::memory_order_release);
    return true;
  }

  /**
   * @brief Attempts to push multiple elements into the buffer.
   *
   * Pushes as many elements as possible from the range [first, last) into the buffer
   * in a single atomic tail update. This reduces atomic operation overhead when
   * pushing multiple items.
   *
   * @tparam InputIt Input iterator type. Must dereference to a type convertible to T.
   * @param first Iterator to the first element to push.
   * @param last Iterator past the last element to push.
   * @return The number of elements successfully pushed. May be less than the
   *         range size if the buffer becomes full.
   *
   * @note Only one thread may call this function at any time (single producer).
   * @note Elements are moved from the input range.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<int, 8> buffer;
   * std::vector<int> items = {1, 2, 3, 4, 5};
   * size_t pushed = buffer.try_push_batch(items.begin(), items.end());
   * // pushed contains the number of items successfully added
   * @endcode
   */
  template <typename InputIt>
  size_type try_push_batch(InputIt first, InputIt last) {
    const size_t currentTail = tail_.load(std::memory_order_relaxed);
    const size_t currentHead = head_.load(std::memory_order_acquire);

    // Calculate available space (actual capacity is kBufferSize - 1)
    size_t available;
    if (currentTail >= currentHead) {
      // Tail is ahead of or at head: available = capacity - (tail - head)
      available = (kBufferSize - 1) - (currentTail - currentHead);
    } else {
      // Tail has wrapped: available = head - tail - 1
      available = currentHead - currentTail - 1;
    }

    if (available == 0) {
      return 0;
    }

    // Push as many items as possible
    size_t count = 0;
    size_t tailPos = currentTail;
    for (; first != last && count < available; ++first, ++count) {
      new (elementAt(tailPos)) T(std::move(*first));
      tailPos = increment(tailPos);
    }

    if (count > 0) {
      tail_.store(tailPos, std::memory_order_release);
    }
    return count;
  }

  /**
   * @brief Attempts to pop multiple elements from the buffer.
   *
   * Pops up to maxCount elements from the buffer into the output iterator
   * in a single atomic head update. This reduces atomic operation overhead
   * when consuming multiple items.
   *
   * @tparam OutputIt Output iterator type. Must be assignable from T.
   * @param dest Output iterator to write popped elements to.
   * @param maxCount Maximum number of elements to pop.
   * @return The number of elements successfully popped. May be less than
   *         maxCount if the buffer has fewer elements.
   *
   * @note Only one thread may call this function at any time (single consumer).
   * @note Elements are moved to the output range.
   *
   * ## Example
   * @code
   * SPSCRingBuffer<int, 8> buffer;
   * // ... push some items ...
   * std::vector<int> items(4);
   * size_t popped = buffer.try_pop_batch(items.begin(), 4);
   * // First 'popped' elements of items contain the popped values
   * @endcode
   */
  template <typename OutputIt>
  size_type try_pop_batch(OutputIt dest, size_type maxCount) {
    const size_t currentHead = head_.load(std::memory_order_relaxed);
    const size_t currentTail = tail_.load(std::memory_order_acquire);

    // Calculate available items
    size_t available;
    if (currentTail >= currentHead) {
      available = currentTail - currentHead;
    } else {
      available = kBufferSize - currentHead + currentTail;
    }

    if (available == 0) {
      return 0;
    }

    // Pop as many items as requested and available
    size_t count = std::min(available, maxCount);
    size_t headPos = currentHead;
    for (size_t i = 0; i < count; ++i, ++dest) {
      T* elem = elementAt(headPos);
      *dest = std::move(*elem);
      elem->~T();
      headPos = increment(headPos);
    }

    if (count > 0) {
      head_.store(headPos, std::memory_order_release);
    }
    return count;
  }

  /**
   * @brief Checks if the buffer is empty.
   *
   * @return true if the buffer contains no elements, false otherwise.
   *
   * @note This function provides only a snapshot of the buffer state.
   *       The result may be stale by the time it is used, as another
   *       thread may have pushed or popped an element.
   *
   * @note Safe to call from any thread, but the result is only a hint.
   */
  bool empty() const {
    return head_.load(std::memory_order_acquire) == tail_.load(std::memory_order_acquire);
  }

  /**
   * @brief Checks if the buffer is full.
   *
   * @return true if the buffer has no space for additional elements, false otherwise.
   *
   * @note This function provides only a snapshot of the buffer state.
   *       The result may be stale by the time it is used, as another
   *       thread may have popped an element.
   *
   * @note Safe to call from any thread, but the result is only a hint.
   */
  bool full() const {
    return increment(tail_.load(std::memory_order_acquire)) ==
        head_.load(std::memory_order_acquire);
  }

  /**
   * @brief Returns the current number of elements in the buffer.
   *
   * @return The number of elements currently in the buffer.
   *
   * @note This function provides only a snapshot of the buffer state.
   *       The result may be stale by the time it is used.
   *
   * @note Safe to call from any thread, but the result is only a hint.
   *
   * @note The implementation handles wrap-around correctly using modular
   *       arithmetic.
   */
  size_type size() const {
    const size_t head = head_.load(std::memory_order_acquire);
    const size_t tail = tail_.load(std::memory_order_acquire);
    // Handle wrap-around: if tail < head, we've wrapped
    return (tail >= head) ? (tail - head) : (kBufferSize - head + tail);
  }

  /**
   * @brief Returns the maximum number of elements the buffer can hold.
   *
   * When RoundUpToPowerOfTwo is true (default), this may be larger than the
   * requested Capacity template parameter, as the internal buffer is rounded
   * up to the next power of two for performance.
   *
   * @return The actual maximum capacity of the buffer (kBufferSize - 1).
   *
   * @note The actual storage uses capacity() + 1 slots internally to distinguish
   *       between empty and full states.
   */
  static constexpr size_type capacity() noexcept {
    return kBufferSize - 1;
  }

 private:
  /**
   * @brief Computes the internal buffer size at compile time.
   *
   * When RoundUpToPowerOfTwo is true, rounds (Capacity + 1) up to the next power of two.
   * Otherwise, uses exactly (Capacity + 1).
   */
  static constexpr size_t computeBufferSize() noexcept {
    return RoundUpToPowerOfTwo ? static_cast<size_t>(detail::nextPow2(Capacity + 1)) : Capacity + 1;
  }

  /**
   * @brief Internal buffer size (includes one extra slot to distinguish empty from full).
   *
   * The ring buffer uses one slot as a sentinel to distinguish between the
   * empty state (head == tail) and the full state (next(tail) == head).
   * When RoundUpToPowerOfTwo is true, this is rounded up to a power of two.
   */
  static constexpr size_t kBufferSize = computeBufferSize();

  /**
   * @brief Compile-time check if kBufferSize is a power of two.
   *
   * When true, we can use faster bitwise AND instead of modulo for wrap-around.
   */
  static constexpr bool kIsPowerOfTwo = (kBufferSize & (kBufferSize - 1)) == 0;

  /**
   * @brief Mask for power-of-two wrap-around (kBufferSize - 1).
   *
   * Only valid when kIsPowerOfTwo is true.
   */
  static constexpr size_t kMask = kBufferSize - 1;

  /**
   * @brief Increments an index with wrap-around.
   *
   * Uses bitwise AND when buffer size is a power of two (faster),
   * otherwise falls back to modulo.
   *
   * @param index The current index.
   * @return The next index, wrapping to 0 if necessary.
   */
  static constexpr size_t increment(size_t index) noexcept {
    return kIsPowerOfTwo ? ((index + 1) & kMask) : ((index + 1) % kBufferSize);
  }

  /**
   * @brief Returns a pointer to the element at the given index.
   */
  T* elementAt(size_t index) {
    return reinterpret_cast<T*>(&storage_[index * sizeof(T)]);
  }

  const T* elementAt(size_t index) const {
    return reinterpret_cast<const T*>(&storage_[index * sizeof(T)]);
  }

  /// Head index (consumer reads from here). Cache-line aligned to avoid false sharing.
  alignas(kCacheLineSize) std::atomic<size_t> head_{0};

  /// Tail index (producer writes here). Cache-line aligned to avoid false sharing.
  alignas(kCacheLineSize) std::atomic<size_t> tail_{0};

  /// Uninitialized storage for elements. Uses Capacity + 1 slots to distinguish empty from full.
  /// Elements are constructed via placement new when pushed and explicitly destroyed when popped.
  alignas(T) char storage_[sizeof(T) * kBufferSize];
};

} // namespace dispenso


================================================
FILE: dispenso/task_set.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "task_set.h"

#include <cstdio>

namespace dispenso {

namespace detail {
// 64 depth is pretty ridiculous, but try not to step on anyone's feet.
constexpr int32_t kMaxTasksStackSize = 64;

DISPENSO_THREAD_LOCAL TaskSetBase* g_taskStack[kMaxTasksStackSize];
DISPENSO_THREAD_LOCAL int32_t g_taskStackSize = 0;

void pushThreadTaskSet(TaskSetBase* t) {
#ifndef NDEBUG
  if (g_taskStackSize < 0 || g_taskStackSize >= kMaxTasksStackSize) {
    fprintf(stderr, "TaskSet parent stack index is invalid when pushing: %d\n", g_taskStackSize);
    std::abort();
  }
#endif // NDEBUG
  g_taskStack[g_taskStackSize++] = t;
}
void popThreadTaskSet() {
#ifndef NDEBUG
  if (g_taskStackSize <= 0) {
    fprintf(stderr, "TaskSet parent stack index is invalid when popping: %d\n", g_taskStackSize);
    std::abort();
  }
#endif // NDEBUG
  --g_taskStackSize;
}
} // namespace detail

TaskSetBase* parentTaskSet() {
  using namespace detail;

#ifndef NDEBUG
  if (g_taskStackSize < 0 || g_taskStackSize >= kMaxTasksStackSize) {
    fprintf(stderr, "TaskSet parent stack index is invalid when accessing: %d\n", g_taskStackSize);
    std::abort();
  }
#endif // NDEBUG

  return g_taskStackSize ? g_taskStack[g_taskStackSize - 1] : nullptr;
}

void TaskSetBase::trySetCurrentException() {
#if defined(__cpp_exceptions)
  auto status = kUnset;
  if (guardException_.compare_exchange_strong(status, kSetting, std::memory_order_acq_rel)) {
    exception_ = std::current_exception();
    guardException_.store(kSet, std::memory_order_release);
    canceled_.store(true, std::memory_order_release);
  }
#endif // __cpp_exceptions
}

inline bool TaskSetBase::testAndResetException() {
#if defined(__cpp_exceptions)
  if (guardException_.load(std::memory_order_acquire) == kSet) {
    auto exception = std::move(exception_);
    guardException_.store(kUnset, std::memory_order_release);
    std::rethrow_exception(exception);
  }
#endif // __cpp_exceptions
  return canceled_.load(std::memory_order_acquire);
}

bool ConcurrentTaskSet::wait() {
  // Steal work until our set is unblocked.  Note that this is not the
  // fastest possible way to unblock the current set, but it will alleviate
  // deadlock, and should provide decent throughput for all waiters.

  // The deadlock scenario mentioned goes as follows:  N threads in the
  // ThreadPool.  Each thread is running code that is using TaskSets.  No
  // progress could be made without stealing.
  while (outstandingTaskCount_.load(std::memory_order_acquire)) {
    if (!pool_.tryExecuteNext()) {
      std::this_thread::yield();
    }
  }

  return testAndResetException();
}

bool ConcurrentTaskSet::tryWait(size_t maxToExecute) {
  while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExecute--) {
    if (!pool_.tryExecuteNext()) {
      break;
    }
  }

  // Must check completion prior to checking exceptions, otherwise there could be a case where
  // exceptions are checked, then an exception is propagated, and then we return whether all items
  // have been completed, thus dropping the exception.
  if (outstandingTaskCount_.load(std::memory_order_acquire)) {
    return false;
  }

  return !testAndResetException();
}

moodycamel::ProducerToken TaskSet::makeToken(moodycamel::ConcurrentQueue<OnceFunction>& pool) {
  return moodycamel::ProducerToken(pool);
}

bool TaskSet::wait() {
  // Steal work until our set is unblocked.
  // The deadlock scenario mentioned goes as follows:  N threads in the
  // ThreadPool.  Each thread is running code that is using TaskSets.  No
  // progress could be made without stealing.
  while (pool_.tryExecuteNextFromProducerToken(token_)) {
  }

  while (outstandingTaskCount_.load(std::memory_order_acquire)) {
    if (!pool_.tryExecuteNext()) {
      std::this_thread::yield();
    }
  }

  return testAndResetException();
}

bool TaskSet::tryWait(size_t maxToExecute) {
  ssize_t maxToExe = static_cast<ssize_t>(maxToExecute);
  while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExe--) {
    if (!pool_.tryExecuteNextFromProducerToken(token_)) {
      break;
    }
  }

  // Must check completion prior to checking exceptions, otherwise there could be a case where
  // exceptions are checked, then an exception is propagated, and then we return whether all items
  // have been completed, thus dropping the exception.

  maxToExe = std::max<ssize_t>(0, maxToExe);

  while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExe--) {
    if (!pool_.tryExecuteNext()) {
      std::this_thread::yield();
    }
  }

  if (outstandingTaskCount_.load(std::memory_order_acquire)) {
    return false;
  }

  return !testAndResetException();
}

} // namespace dispenso


================================================
FILE: dispenso/task_set.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file task_set.h
 * @ingroup group_core
 * A file providing TaskSet and ConcurrentTaskSet.  These interfaces allow the user to
 * submit/schedule multiple closures and then wait on them.
 **/

#pragma once

namespace dispenso {
enum class ParentCascadeCancel { kOff, kOn };
}

#include <dispenso/detail/task_set_impl.h>

namespace dispenso {

constexpr ssize_t kDefaultStealingMultiplier = 4;

/**
 * <code>TaskSet</code> is an object that allows scheduling multiple functors to a thread pool, and
 * allows to wait on that set of tasks.  <code>TaskSet</code> supplies more efficient schedule/wait
 * than <code>ConcurrentTaskSet</code>, but at the expense of only being usable from one thread at a
 * time.
 *
 * <code>TaskSet</code> is "thread-compatible".  This means that you can safely use
 * different <code>TaskSet</code> objects on different threads concurrently. Any given
 * <code>TaskSet</code> object may only be used from a single thread, so no concurrent use of that
 * object is allowed.
 **/
class TaskSet : public TaskSetBase {
 public:
  /**
   * Construct a TaskSet with the given backing pool.
   *
   * @param p The backing pool for this TaskSet
   * @param registerForParentCancel Whether to register for parent cancellation cascade.
   * @param stealingLoadMultiplier An over-load factor.  If this factor of load is reached by the
   * underlying pool, scheduled tasks may run immediately in the calling thread.
   **/
  TaskSet(
      ThreadPool& p,
      ParentCascadeCancel registerForParentCancel,
      ssize_t stealingLoadMultiplier = kDefaultStealingMultiplier)
      : TaskSetBase(p, registerForParentCancel, stealingLoadMultiplier),
        token_(makeToken(p.work_)) {}

  /** Construct a TaskSet with default options. @param p The backing pool. */
  TaskSet(ThreadPool& p) : TaskSet(p, ParentCascadeCancel::kOff, kDefaultStealingMultiplier) {}
  /** Construct a TaskSet with custom load multiplier. @param p The backing pool. @param
   * stealingLoadMultiplier The over-load factor. */
  TaskSet(ThreadPool& p, ssize_t stealingLoadMultiplier)
      : TaskSet(p, ParentCascadeCancel::kOff, stealingLoadMultiplier) {}

  TaskSet(TaskSet&& other) = delete;
  TaskSet& operator=(TaskSet&& other) = delete;

  /**
   * Schedule a functor for execution on the underlying pool.  If the load on the
   * underlying pool is high, immediate inline execution may occur on the current thread.
   *
   * @param f A functor matching signature <code>void()</code>.  Best performance will come from
   * passing lambdas, other concrete functors, or <code>OnceFunction</code>, but
   * <code>std::function</code> or similarly type-erased objects will also work.
   *
   * @note If <code>f</code> can throw exceptions, then <code>schedule</code> may throw if the task
   * is run inline.  Otherwise, exceptions will be caught on the running thread and best-effort
   * propagated to the <code>ConcurrentTaskSet</code>, where the first one from the set is rethrown
   * in <code>wait</code>.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f) {
    if (DISPENSO_EXPECT(canceled(), false)) {
      return;
    }
    if (outstandingTaskCount_.load(std::memory_order_relaxed) > taskSetLoadFactor_) {
      f();
    } else {
      pool_.schedule(token_, packageTask(std::forward<F>(f)));
    }
  }

  /**
   * Schedule a functor for execution on the underlying pool.
   *
   * @param f A functor matching signature <code>void()</code>.  Best performance will come from
   * passing lambdas, other concrete functors, or <code>OnceFunction</code>, but
   * <code>std::function</code> or similarly type-erased objects will also work.
   * @param fq Tag to force queuing instead of potential inline execution.
   *
   * @note If <code>f</code> can throw exceptions, then exceptions will be caught on the running
   * thread and best-effort propagated to the <code>ConcurrentTaskSet</code>, where the first one
   * from the set is rethrown in <code>wait</code>.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f, ForceQueuingTag fq) {
    pool_.schedule(token_, packageTask(std::forward<F>(f)), fq);
  }

  /**
   * Schedule multiple functors for execution on the underlying pool in bulk.
   * This is more efficient than calling schedule() multiple times when you have many tasks to
   * submit, as it reduces atomic contention and allows for better thread wakeup behavior.
   *
   * @param count The number of functors to schedule.
   * @param gen A generator functor that takes an index and returns a functor to execute.
   *            gen(i) will be called for i in [0, count) to produce each task.
   *
   * @note Work is processed in chunks, interleaving enqueue and inline execution based on the
   *       task set's load factor, preventing both pool thread starvation and excessive overhead.
   **/
  template <typename Generator>
  void scheduleBulk(size_t count, Generator&& gen) {
    scheduleBulkImpl(count, std::forward<Generator>(gen), &token_);
  }

  /**
   * Wait for all currently scheduled functors to finish execution.  If exceptions are thrown
   * during execution of the set of tasks, <code>wait</code> will propagate the first exception.
   *
   * @return true if the TaskSet was canceled, false otherwise
   **/
  DISPENSO_DLL_ACCESS bool wait();

  /**
   * See if the currently scheduled functors can be completed while stealing and executing at most
   * <code>maxToExecute</code> of them from the pool. If not used in conjunction with wait, there
   * may be cases that <code>tryWait</code> must be called multiple times with
   * <code>maxToExecute &gt 0</code> to prevent livelock/deadlock.  If exceptions have been
   * propagated since the last call to <code>wait</code> or <code>tryWait</code>,
   * <code>tryWait</code> will propagate the first of them.
   *
   * @param maxToExecute The maximum number of tasks to proactively execute on the current thread.
   *
   * @return <code>true</code> if all currently scheduled functors have been completed prior to
   * returning, and <code>false</code> otherwise.  This includes returning false if the TaskSet was
   * cancelled.
   **/
  DISPENSO_DLL_ACCESS bool tryWait(size_t maxToExecute);

  /**
   * Set the TaskSet to canceled state.  No unexecuted tasks will execute once this is set.
   * Already executing tasks may check canceled() status to exit early.
   *
   **/
  void cancel() {
    TaskSetBase::cancel();
  }

  /**
   * Check the canceled status of the TaskSet.
   *
   * @return a boolean indicating whether or not the TaskSet has been canceled.
   **/
  bool canceled() const {
    return TaskSetBase::canceled();
  }

  /**
   * Destroy the TaskSet, first waiting for all currently scheduled functors to
   * finish execution.
   **/
  ~TaskSet() {
    wait();
  }

 private:
  DISPENSO_DLL_ACCESS moodycamel::ProducerToken makeToken(
      moodycamel::ConcurrentQueue<OnceFunction>& pool);

  moodycamel::ProducerToken token_;

  template <typename Result>
  friend class detail::FutureBase;
};

/**
 * <code>ConcurrentTaskSet</code> fulfills the same API as <code>TaskSet</code> with one minor
 * difference: It may be used to schedule tasks concurrently from multiple threads (see more below).
 * It is an object that allows scheduling multiple function-like objects to a thread pool, and
 * allows to wait on that set of tasks.
 *
 * <code>ConcurrentTaskSet</code> is "thread-compatible".  This means that you can safely use
 * different <code>ConcurrentTaskSet</code> objects on different threads concurrently.
 * <code>ConcurrentTaskSet</code> also allows multiple threads to concurrently schedule against it.
 * It is an error to call wait() concurrently with schedule() on the same
 * <code>ConcurrentTaskSet</code>.
 */
class ConcurrentTaskSet : public TaskSetBase {
 public:
  /**
   * Construct a ConcurrentTaskSet with the given backing pool.
   *
   * @param pool The backing pool for this ConcurrentTaskSet
   * @param registerForParentCancel Whether to register for parent cancellation cascade.
   * @param stealingLoadMultiplier An over-load factor.  If this factor of load is reached by the
   * underlying pool, scheduled tasks may run immediately in the calling thread.
   **/
  ConcurrentTaskSet(
      ThreadPool& pool,
      ParentCascadeCancel registerForParentCancel,
      ssize_t stealingLoadMultiplier = kDefaultStealingMultiplier)
      : TaskSetBase(pool, registerForParentCancel, stealingLoadMultiplier) {}

  /** Construct a ConcurrentTaskSet with default options. @param p The backing pool. */
  ConcurrentTaskSet(ThreadPool& p)
      : ConcurrentTaskSet(p, ParentCascadeCancel::kOff, kDefaultStealingMultiplier) {}
  /** Construct a ConcurrentTaskSet with custom load multiplier. @param p The backing pool. @param
   * stealingLoadMultiplier The over-load factor. */
  ConcurrentTaskSet(ThreadPool& p, ssize_t stealingLoadMultiplier)
      : ConcurrentTaskSet(p, ParentCascadeCancel::kOff, stealingLoadMultiplier) {}

  ConcurrentTaskSet(ConcurrentTaskSet&& other) = delete;
  ConcurrentTaskSet& operator=(ConcurrentTaskSet&& other) = delete;

  /**
   * Schedule a functor for execution on the underlying pool.  If the load on the
   * underlying pool is high, immediate inline execution may occur on the current thread.
   *
   * @param f A functor matching signature <code>void()</code>.  Best performance will come from
   * passing lambdas, other concrete functors, or <code>OnceFunction</code>, but
   * <code>std::function</code> or similarly type-erased objects will also work.
   *
   * @param skipRecheck A poweruser knob that says that if we don't have enough outstanding tasks to
   * immediately work steal, we should bypass the similar check in the ThreadPool.
   *
   * @note If <code>f</code> can throw exceptions, then <code>schedule</code> may throw if the task
   * is run inline.  Otherwise, exceptions will be caught on the running thread and best-effort
   * propagated to the <code>ConcurrentTaskSet</code>, where the first one from the set is rethrown
   * in <code>wait</code>.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f, bool skipRecheck = false) {
    if (outstandingTaskCount_.load(std::memory_order_relaxed) > taskSetLoadFactor_ &&
        DISPENSO_EXPECT(!canceled(), true)) {
      f();
    } else if (skipRecheck) {
      pool_.schedule(packageTask(std::forward<F>(f)), ForceQueuingTag());
    } else {
      pool_.schedule(packageTask(std::forward<F>(f)));
    }
  }

  /**
   * Schedule a functor for execution on the underlying pool.
   *
   * @param f A functor matching signature <code>void()</code>.  Best performance will come from
   * passing lambdas, other concrete functors, or <code>OnceFunction</code>, but
   * <code>std::function</code> or similarly type-erased objects will also work.
   * @param fq Tag to force queuing instead of potential inline execution.
   *
   * @note If <code>f</code> can throw exceptions, then exceptions will be caught on the running
   * thread and best-effort propagated to the <code>ConcurrentTaskSet</code>, where the first one
   * from the set is rethrown in <code>wait</code>.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f, ForceQueuingTag fq) {
    pool_.schedule(packageTask(std::forward<F>(f)), fq);
  }

  /**
   * Schedule multiple functors for execution on the underlying pool in bulk.
   * This is more efficient than calling schedule() multiple times when you have many tasks to
   * submit, as it reduces atomic contention and allows for better thread wakeup behavior.
   *
   * @param count The number of functors to schedule.
   * @param gen A generator functor that takes an index and returns a functor to execute.
   *            gen(i) will be called for i in [0, count) to produce each task.
   *
   * @note Work is processed in chunks, interleaving enqueue and inline execution based on the
   *       task set's load factor, preventing both pool thread starvation and excessive overhead.
   **/
  template <typename Generator>
  void scheduleBulk(size_t count, Generator&& gen) {
    scheduleBulkImpl(count, std::forward<Generator>(gen), nullptr);
  }

  /**
   * Wait for all currently scheduled functors to finish execution.  If exceptions are thrown
   * during execution of the set of tasks, <code>wait</code> will propagate the first exception.
   **/
  DISPENSO_DLL_ACCESS bool wait();

  /**
   * See if the currently scheduled functors can be completed while stealing and executing at most
   * <code>maxToExecute</code> of them from the pool. If not used in conjunction with wait, there
   * may be cases that <code>tryWait</code> must be called multiple times with
   * <code>maxToExecute &gt 0</code> to prevent livelock/deadlock.  If exceptions have been
   * propagated since the last call to <code>wait</code> or <code>tryWait</code>,
   * <code>tryWait</code> will propagate the first of them.
   *
   * @param maxToExecute The maximum number of tasks to proactively execute on the current thread.
   *
   * @return <code>true</code> if all currently scheduled functors have been completed prior to
   * returning, and <code>false</code> otherwise (including cancelled cases).
   **/
  DISPENSO_DLL_ACCESS bool tryWait(size_t maxToExecute);

  /**
   * Set the ConcurrentTaskSet to canceled state.  No unexecuted tasks will execute once this is
   * set.  Already executing tasks may check canceled() status to exit early.
   *
   * @note This will be reset automatically by wait.
   **/
  void cancel() {
    TaskSetBase::cancel();
  }

  /**
   * Check the canceled status of the ConcurrentTaskSet.
   *
   * @return a boolean indicating whether or not the ConcurrentTaskSet has been canceled.
   **/
  bool canceled() const {
    return TaskSetBase::canceled();
  }

  /**
   * Destroy the ConcurrentTaskSet, first waiting for all currently scheduled functors to
   * finish execution.
   **/
  ~ConcurrentTaskSet() {
    wait();
  }

 private:
  bool tryExecuteNext() {
    return pool_.tryExecuteNext();
  }

  template <typename Result>
  friend class detail::FutureBase;

  friend class detail::LimitGatedScheduler;
};

/**
 * Get access to the parent task set that scheduled the currently running code. nullptr if called
 * outside the context of a (Concurrent)TaskSet schedule.
 *
 **/
DISPENSO_DLL_ACCESS TaskSetBase* parentTaskSet();

} // namespace dispenso


================================================
FILE: dispenso/third-party/moodycamel/LICENSE.md
================================================
This license file applies to everything in this repository except that which
is explicitly annotated as being written by other authors, i.e. the Boost
queue (included in the benchmarks for comparison), Intel's TBB library (ditto),
dlib::pipe (ditto),
the CDSChecker tool (used for verification), the Relacy model checker (ditto),
and Jeff Preshing's semaphore implementation (used in the blocking queue) which
has a zlib license (embedded in lightweightsempahore.h).

---

Simplified BSD License:

Copyright (c) 2013-2016, Cameron Desrochers.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

- Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

---

I have also chosen to dual-license under the Boost Software License as an alternative to
the Simplified BSD license above:

Boost Software License - Version 1.0 - August 17th, 2003

Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:

The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.


================================================
FILE: dispenso/third-party/moodycamel/README.txt
================================================
https://github.com/cameron314/concurrentqueue

commit 65d6970912fc3f6bb62d80edf95ca30e0df85137 (HEAD -> master, origin/master, origin/HEAD)
Merge: d49fa2b 08dcafc
Author: Cameron <cameron@moodycamel.com>
Date:   Sun Jul 24 10:02:12 2022 -0400

    Merge pull request #308 from r8bhavneet/master
    
    Update README.md

commit 08dcafcd131b46e1a63abdc9b5f73c852193edca
Author: r8bhavneet <98200254+r8bhavneet@users.noreply.github.com>
Date:   Sun Jul 24 02:35:57 2022 -0700

    Update README.md
    
    Hey, I really liked the project and was reading through the Readme.md file when I came across some redundant words and phrases which you might have missed whil
e editing the documentation. It would be really a great opportunity for me if I could contribute to this project. Thank you.

commit d49fa2b0bd1c6185d93509f48c8987f9759d7238
Merge: 0a40449 9dc1b2c
Author: Cameron <cameron@moodycamel.com>
Date:   Mon May 9 07:43:29 2022 -0400

    Merge pull request #296 from MathiasMagnus/fix-c4554
    
    Proper MSVC warning fix and note

commit 9dc1b2cfcad03b4ee22ea57ddb5c453c41c19ac9
Author: Máté Ferenc Nagy-Egri <mate@streamhpc.com>
Date:   Mon May 9 13:19:39 2022 +0200

    Proper MSVC warning fix and note

commit 0a404492ac2c0bba0f62eb2b859ec152e494f8bf
Author: Cameron <cameron@moodycamel.com>
Date:   Sat May 7 12:04:00 2022 -0400

    Attempt to resolve -Wsign-conversion warnings in concurrentqueue.h (see #294)

commit 22c78daf65d2c8cce9399a29171676054aa98807
Merge: c52e5ef 263c55d
Author: Cameron <cameron@moodycamel.com>
Date:   Sun Mar 20 15:16:30 2022 -0400

    Merge pull request #290 from usurai/master
    
    Fix link in README

commit 263c55d5c95545abee1ef25662c752c5296d7c34
Author: usurai <crowdwei@gmail.com>
Date:   Thu Mar 17 16:09:14 2022 +0800

    Fix link in README


================================================
FILE: dispenso/third-party/moodycamel/blockingconcurrentqueue.h
================================================
// Provides an efficient blocking version of moodycamel::ConcurrentQueue.
// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified
// BSD license, available at the top of concurrentqueue.h.
// Also dual-licensed under the Boost Software License (see LICENSE.md)
// Uses Jeff Preshing's semaphore implementation (under the terms of its
// separate zlib license, see lightweightsemaphore.h).

#pragma once

#include "concurrentqueue.h"
#include "lightweightsemaphore.h"

#include <type_traits>
#include <cerrno>
#include <memory>
#include <chrono>
#include <ctime>

namespace moodycamel
{
// This is a blocking version of the queue. It has an almost identical interface to
// the normal non-blocking version, with the addition of various wait_dequeue() methods
// and the removal of producer-specific dequeue methods.
template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
class BlockingConcurrentQueue
{
private:
	typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
	typedef ::moodycamel::LightweightSemaphore LightweightSemaphore;

public:
	typedef typename ConcurrentQueue::producer_token_t producer_token_t;
	typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
	
	typedef typename ConcurrentQueue::index_t index_t;
	typedef typename ConcurrentQueue::size_t size_t;
	typedef typename std::make_signed<size_t>::type ssize_t;
	
	static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
	static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
	
public:
	// Creates a queue with at least `capacity` element slots; note that the
	// actual number of elements that can be inserted without additional memory
	// allocation depends on the number of producers and the block size (e.g. if
	// the block size is equal to `capacity`, only a single block will be allocated
	// up-front, which means only a single producer will be able to enqueue elements
	// without an extra allocation -- blocks aren't shared between producers).
	// This method is not thread safe -- it is up to the user to ensure that the
	// queue is fully constructed before it starts being used by other threads (this
	// includes making the memory effects of construction visible, possibly with a
	// memory barrier).
	explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
		: inner(capacity), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
	{
		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
		if (!sema) {
			MOODYCAMEL_THROW(std::bad_alloc());
		}
	}
	
	BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
		: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
	{
		assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
		if (!sema) {
			MOODYCAMEL_THROW(std::bad_alloc());
		}
	}
	
	// Disable copying and copy assignment
	BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
	BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
	
	// Moving is supported, but note that it is *not* a thread-safe operation.
	// Nobody can use the queue while it's being moved, and the memory effects
	// of that move must be propagated to other threads before they can use it.
	// Note: When a queue is moved, its tokens are still valid but can only be
	// used with the destination queue (i.e. semantically they are moved along
	// with the queue itself).
	BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
		: inner(std::move(other.inner)), sema(std::move(other.sema))
	{ }
	
	inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
	{
		return swap_internal(other);
	}
	
	// Swaps this queue's state with the other's. Not thread-safe.
	// Swapping two queues does not invalidate their tokens, however
	// the tokens that were created for one queue must be used with
	// only the swapped queue (i.e. the tokens are tied to the
	// queue's movable state, not the object itself).
	inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
	{
		swap_internal(other);
	}
	
private:
	BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
	{
		if (this == &other) {
			return *this;
		}
		
		inner.swap(other.inner);
		sema.swap(other.sema);
		return *this;
	}
	
public:
	// Enqueues a single item (by copying it).
	// Allocates memory if required. Only fails if memory allocation fails (or implicit
	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(T const& item)
	{
		if ((details::likely)(inner.enqueue(item))) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by moving it, if possible).
	// Allocates memory if required. Only fails if memory allocation fails (or implicit
	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(T&& item)
	{
		if ((details::likely)(inner.enqueue(std::move(item)))) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by copying it) using an explicit producer token.
	// Allocates memory if required. Only fails if memory allocation fails (or
	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(producer_token_t const& token, T const& item)
	{
		if ((details::likely)(inner.enqueue(token, item))) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
	// Allocates memory if required. Only fails if memory allocation fails (or
	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(producer_token_t const& token, T&& item)
	{
		if ((details::likely)(inner.enqueue(token, std::move(item)))) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues several items.
	// Allocates memory if required. Only fails if memory allocation fails (or
	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
	// Thread-safe.
	template<typename It>
	inline bool enqueue_bulk(It itemFirst, size_t count)
	{
		if ((details::likely)(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
			return true;
		}
		return false;
	}
	
	// Enqueues several items using an explicit producer token.
	// Allocates memory if required. Only fails if memory allocation fails
	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Note: Use std::make_move_iterator if the elements should be moved
	// instead of copied.
	// Thread-safe.
	template<typename It>
	inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
	{
		if ((details::likely)(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by copying it).
	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
	// is 0).
	// Thread-safe.
	inline bool try_enqueue(T const& item)
	{
		if (inner.try_enqueue(item)) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by moving it, if possible).
	// Does not allocate memory (except for one-time implicit producer).
	// Fails if not enough room to enqueue (or implicit production is
	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
	// Thread-safe.
	inline bool try_enqueue(T&& item)
	{
		if (inner.try_enqueue(std::move(item))) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by copying it) using an explicit producer token.
	// Does not allocate memory. Fails if not enough room to enqueue.
	// Thread-safe.
	inline bool try_enqueue(producer_token_t const& token, T const& item)
	{
		if (inner.try_enqueue(token, item)) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
	// Does not allocate memory. Fails if not enough room to enqueue.
	// Thread-safe.
	inline bool try_enqueue(producer_token_t const& token, T&& item)
	{
		if (inner.try_enqueue(token, std::move(item))) {
			sema->signal();
			return true;
		}
		return false;
	}
	
	// Enqueues several items.
	// Does not allocate memory (except for one-time implicit producer).
	// Fails if not enough room to enqueue (or implicit production is
	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
	// Note: Use std::make_move_iterator if the elements should be moved
	// instead of copied.
	// Thread-safe.
	template<typename It>
	inline bool try_enqueue_bulk(It itemFirst, size_t count)
	{
		if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
			return true;
		}
		return false;
	}
	
	// Enqueues several items using an explicit producer token.
	// Does not allocate memory. Fails if not enough room to enqueue.
	// Note: Use std::make_move_iterator if the elements should be moved
	// instead of copied.
	// Thread-safe.
	template<typename It>
	inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
	{
		if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
			sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
			return true;
		}
		return false;
	}
	
	
	// Attempts to dequeue from the queue.
	// Returns false if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename U>
	inline bool try_dequeue(U& item)
	{
		if (sema->tryWait()) {
			while (!inner.try_dequeue(item)) {
				continue;
			}
			return true;
		}
		return false;
	}
	
	// Attempts to dequeue from the queue using an explicit consumer token.
	// Returns false if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename U>
	inline bool try_dequeue(consumer_token_t& token, U& item)
	{
		if (sema->tryWait()) {
			while (!inner.try_dequeue(token, item)) {
				continue;
			}
			return true;
		}
		return false;
	}
	
	// Attempts to dequeue several elements from the queue.
	// Returns the number of items actually dequeued.
	// Returns 0 if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t try_dequeue_bulk(It itemFirst, size_t max)
	{
		size_t count = 0;
		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
		while (count != max) {
			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
		}
		return count;
	}
	
	// Attempts to dequeue several elements from the queue using an explicit consumer token.
	// Returns the number of items actually dequeued.
	// Returns 0 if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
	{
		size_t count = 0;
		max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
		while (count != max) {
			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
		}
		return count;
	}
	
	
	// Blocks the current thread until there's something to dequeue, then
	// dequeues it.
	// Never allocates. Thread-safe.
	template<typename U>
	inline void wait_dequeue(U& item)
	{
		while (!sema->wait()) {
			continue;
		}
		while (!inner.try_dequeue(item)) {
			continue;
		}
	}

	// Blocks the current thread until either there's something to dequeue
	// or the timeout (specified in microseconds) expires. Returns false
	// without setting `item` if the timeout expires, otherwise assigns
	// to `item` and returns true.
	// Using a negative timeout indicates an indefinite timeout,
	// and is thus functionally equivalent to calling wait_dequeue.
	// Never allocates. Thread-safe.
	template<typename U>
	inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs)
	{
		if (!sema->wait(timeout_usecs)) {
			return false;
		}
		while (!inner.try_dequeue(item)) {
			continue;
		}
		return true;
	}
    
    // Blocks the current thread until either there's something to dequeue
	// or the timeout expires. Returns false without setting `item` if the
    // timeout expires, otherwise assigns to `item` and returns true.
	// Never allocates. Thread-safe.
	template<typename U, typename Rep, typename Period>
	inline bool wait_dequeue_timed(U& item, std::chrono::duration<Rep, Period> const& timeout)
    {
        return wait_dequeue_timed(item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
    }
	
	// Blocks the current thread until there's something to dequeue, then
	// dequeues it using an explicit consumer token.
	// Never allocates. Thread-safe.
	template<typename U>
	inline void wait_dequeue(consumer_token_t& token, U& item)
	{
		while (!sema->wait()) {
			continue;
		}
		while (!inner.try_dequeue(token, item)) {
			continue;
		}
	}
	
	// Blocks the current thread until either there's something to dequeue
	// or the timeout (specified in microseconds) expires. Returns false
	// without setting `item` if the timeout expires, otherwise assigns
	// to `item` and returns true.
	// Using a negative timeout indicates an indefinite timeout,
	// and is thus functionally equivalent to calling wait_dequeue.
	// Never allocates. Thread-safe.
	template<typename U>
	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs)
	{
		if (!sema->wait(timeout_usecs)) {
			return false;
		}
		while (!inner.try_dequeue(token, item)) {
			continue;
		}
		return true;
	}
    
    // Blocks the current thread until either there's something to dequeue
	// or the timeout expires. Returns false without setting `item` if the
    // timeout expires, otherwise assigns to `item` and returns true.
	// Never allocates. Thread-safe.
	template<typename U, typename Rep, typename Period>
	inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration<Rep, Period> const& timeout)
    {
        return wait_dequeue_timed(token, item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
    }
	
	// Attempts to dequeue several elements from the queue.
	// Returns the number of items actually dequeued, which will
	// always be at least one (this method blocks until the queue
	// is non-empty) and at most max.
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
	{
		size_t count = 0;
		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
		while (count != max) {
			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
		}
		return count;
	}
	
	// Attempts to dequeue several elements from the queue.
	// Returns the number of items actually dequeued, which can
	// be 0 if the timeout expires while waiting for elements,
	// and at most max.
	// Using a negative timeout indicates an indefinite timeout,
	// and is thus functionally equivalent to calling wait_dequeue_bulk.
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs)
	{
		size_t count = 0;
		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
		while (count != max) {
			count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
		}
		return count;
	}
    
    // Attempts to dequeue several elements from the queue.
	// Returns the number of items actually dequeued, which can
	// be 0 if the timeout expires while waiting for elements,
	// and at most max.
	// Never allocates. Thread-safe.
	template<typename It, typename Rep, typename Period>
	inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
    {
        return wait_dequeue_bulk_timed<It&>(itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
    }
	
	// Attempts to dequeue several elements from the queue using an explicit consumer token.
	// Returns the number of items actually dequeued, which will
	// always be at least one (this method blocks until the queue
	// is non-empty) and at most max.
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
	{
		size_t count = 0;
		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
		while (count != max) {
			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
		}
		return count;
	}
	
	// Attempts to dequeue several elements from the queue using an explicit consumer token.
	// Returns the number of items actually dequeued, which can
	// be 0 if the timeout expires while waiting for elements,
	// and at most max.
	// Using a negative timeout indicates an indefinite timeout,
	// and is thus functionally equivalent to calling wait_dequeue_bulk.
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs)
	{
		size_t count = 0;
		max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
		while (count != max) {
			count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
		}
		return count;
	}
	
	// Attempts to dequeue several elements from the queue using an explicit consumer token.
	// Returns the number of items actually dequeued, which can
	// be 0 if the timeout expires while waiting for elements,
	// and at most max.
	// Never allocates. Thread-safe.
	template<typename It, typename Rep, typename Period>
	inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
    {
        return wait_dequeue_bulk_timed<It&>(token, itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
    }
	
	
	// Returns an estimate of the total number of elements currently in the queue. This
	// estimate is only accurate if the queue has completely stabilized before it is called
	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
	// visible on the calling thread, and no further operations start while this method is
	// being called).
	// Thread-safe.
	inline size_t size_approx() const
	{
		return (size_t)sema->availableApprox();
	}
	
	
	// Returns true if the underlying atomic variables used by
	// the queue are lock-free (they should be on most platforms).
	// Thread-safe.
	static constexpr bool is_lock_free()
	{
		return ConcurrentQueue::is_lock_free();
	}
	

private:
	template<typename U, typename A1, typename A2>
	static inline U* create(A1&& a1, A2&& a2)
	{
		void* p = (Traits::malloc)(sizeof(U));
		return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) : nullptr;
	}
	
	template<typename U>
	static inline void destroy(U* p)
	{
		if (p != nullptr) {
			p->~U();
		}
		(Traits::free)(p);
	}
	
private:
	ConcurrentQueue inner;
	std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
};


template<typename T, typename Traits>
inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
{
	a.swap(b);
}

}	// end namespace moodycamel


================================================
FILE: dispenso/third-party/moodycamel/concurrentqueue.h
================================================
// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue.
// An overview, including benchmark results, is provided here:
//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
// The full design is also described in excruciating detail at:
//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue

// Simplified BSD license:
// Copyright (c) 2013-2020, Cameron Desrochers.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice, this list of
// conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials
// provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Also dual-licensed under the Boost Software License (see LICENSE.md)

#pragma once

#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings
// upon assigning any computed values)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"

#if defined(__clang__)
#pragma GCC diagnostic ignored "-Wglobal-constructors"
#endif //__clang__

#ifdef MCDBGQ_USE_RELACY
#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
#endif
#endif

#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher
// does not support `if constexpr`, so we have no choice but to simply disable the warning
#pragma warning(push)
#pragma warning(disable: 4127)  // conditional expression is constant
#endif

#if defined(__APPLE__)
#include "TargetConditionals.h"
#endif

#ifdef MCDBGQ_USE_RELACY
#include "relacy/relacy_std.hpp"
#include "relacy_shims.h"
// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations.
// We'll override the default trait malloc ourselves without a macro.
#undef new
#undef delete
#undef malloc
#undef free
#else
#include <atomic>		// Requires C++11. Sorry VS2010.
#include <cassert>
#endif
#include <cstddef>              // for max_align_t
#include <cstdint>
#include <cstdlib>
#include <type_traits>
#include <algorithm>
#include <utility>
#include <limits>
#include <climits>		// for CHAR_BIT
#include <array>
#include <thread>		// partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
#include <mutex>        // used for thread exit synchronization

// Platform-specific definitions of a numeric thread ID type and an invalid value
namespace moodycamel { namespace details {
	template<typename thread_id_t> struct thread_id_converter {
		typedef thread_id_t thread_id_numeric_size_t;
		typedef thread_id_t thread_id_hash_t;
		static thread_id_hash_t prehash(thread_id_t const& x) { return x; }
	};
} }
#if defined(MCDBGQ_USE_RELACY)
namespace moodycamel { namespace details {
	typedef std::uint32_t thread_id_t;
	static const thread_id_t invalid_thread_id  = 0xFFFFFFFFU;
	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
	static inline thread_id_t thread_id() { return rl::thread_index(); }
} }
#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
// No sense pulling in windows.h in a header, we'll manually declare the function
// we use and rely on backwards-compatibility for this not to break
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
namespace moodycamel { namespace details {
	static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows");
	typedef std::uint32_t thread_id_t;
	static const thread_id_t invalid_thread_id  = 0;			// See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
	static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU;	// Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
	static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
} }
#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(MOODYCAMEL_NO_THREAD_LOCAL)
namespace moodycamel { namespace details {
	static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
	
	typedef std::thread::id thread_id_t;
	static const thread_id_t invalid_thread_id;         // Default ctor creates invalid ID

	// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's
	// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't
	// be.
	static inline thread_id_t thread_id() { return std::this_thread::get_id(); }

	template<std::size_t> struct thread_id_size { };
	template<> struct thread_id_size<4> { typedef std::uint32_t numeric_t; };
	template<> struct thread_id_size<8> { typedef std::uint64_t numeric_t; };

	template<> struct thread_id_converter<thread_id_t> {
		typedef thread_id_size<sizeof(thread_id_t)>::numeric_t thread_id_numeric_size_t;
#ifndef __APPLE__
		typedef std::size_t thread_id_hash_t;
#else
		typedef thread_id_numeric_size_t thread_id_hash_t;
#endif

		static thread_id_hash_t prehash(thread_id_t const& x)
		{
#ifndef __APPLE__
			return std::hash<std::thread::id>()(x);
#else
			return *reinterpret_cast<thread_id_hash_t const*>(&x);
#endif
		}
	};
} }
#else
// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
// In order to get a numeric thread ID in a platform-independent way, we use a thread-local
// static variable's address as a thread identifier :-)
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
#define MOODYCAMEL_THREADLOCAL __thread
#elif defined(_MSC_VER)
#define MOODYCAMEL_THREADLOCAL __declspec(thread)
#else
// Assume C++11 compliant compiler
#define MOODYCAMEL_THREADLOCAL thread_local
#endif
namespace moodycamel { namespace details {
	typedef std::uintptr_t thread_id_t;
	static const thread_id_t invalid_thread_id  = 0;		// Address can't be nullptr
	static const thread_id_t invalid_thread_id2 = 1;		// Member accesses off a null pointer are also generally invalid. Plus it's not aligned.
	inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast<thread_id_t>(&x); }
} }
#endif

// Constexpr if
#ifndef MOODYCAMEL_CONSTEXPR_IF
#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L
#define MOODYCAMEL_CONSTEXPR_IF if constexpr
#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
#else
#define MOODYCAMEL_CONSTEXPR_IF if
#define MOODYCAMEL_MAYBE_UNUSED
#endif
#endif

// Exceptions
#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
#define MOODYCAMEL_EXCEPTIONS_ENABLED
#endif
#endif
#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
#define MOODYCAMEL_TRY try
#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
#define MOODYCAMEL_RETHROW throw
#define MOODYCAMEL_THROW(expr) throw (expr)
#else
#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF (true)
#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF (false)
#define MOODYCAMEL_RETHROW
#define MOODYCAMEL_THROW(expr)
#endif

#ifndef MOODYCAMEL_NOEXCEPT
#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
#define MOODYCAMEL_NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-(
// We have to assume *all* non-trivial constructors may throw on VS2012!
#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) (std::is_rvalue_reference<valueType>::value && std::is_move_constructible<type>::value ? std::is_trivially_move_constructible<type>::value || std::is_nothrow_move_constructible<type>::value : std::is_trivially_copy_constructible<type>::value || std::is_nothrow_copy_constructible<type>::value)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) ((std::is_rvalue_reference<valueType>::value && std::is_move_assignable<type>::value ? std::is_trivially_move_assignable<type>::value || std::is_nothrow_move_assignable<type>::value : std::is_trivially_copy_assignable<type>::value || std::is_nothrow_copy_assignable<type>::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
#else
#define MOODYCAMEL_NOEXCEPT noexcept
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
#endif
#endif

#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#ifdef MCDBGQ_USE_RELACY
#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#else
// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
// g++ <=4.7 doesn't support thread_local either.
// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
// Assume `thread_local` is fully supported in all other C++11 compilers/platforms
#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED    // tentatively enabled for now; years ago several users report having problems with it on
#endif
#endif
#endif

// VS2012 doesn't support deleted functions. 
// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called.
#ifndef MOODYCAMEL_DELETE_FUNCTION
#if defined(_MSC_VER) && _MSC_VER < 1800
#define MOODYCAMEL_DELETE_FUNCTION
#else
#define MOODYCAMEL_DELETE_FUNCTION = delete
#endif
#endif

namespace moodycamel { namespace details {
#ifndef MOODYCAMEL_ALIGNAS
// VS2013 doesn't support alignas or alignof, and align() requires a constant literal
#if defined(_MSC_VER) && _MSC_VER <= 1800
#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
	template<int Align, typename T> struct Vs2013Aligned { };  // default, unsupported alignment
	template<typename T> struct Vs2013Aligned<1, T> { typedef __declspec(align(1)) T type; };
	template<typename T> struct Vs2013Aligned<2, T> { typedef __declspec(align(2)) T type; };
	template<typename T> struct Vs2013Aligned<4, T> { typedef __declspec(align(4)) T type; };
	template<typename T> struct Vs2013Aligned<8, T> { typedef __declspec(align(8)) T type; };
	template<typename T> struct Vs2013Aligned<16, T> { typedef __declspec(align(16)) T type; };
	template<typename T> struct Vs2013Aligned<32, T> { typedef __declspec(align(32)) T type; };
	template<typename T> struct Vs2013Aligned<64, T> { typedef __declspec(align(64)) T type; };
	template<typename T> struct Vs2013Aligned<128, T> { typedef __declspec(align(128)) T type; };
	template<typename T> struct Vs2013Aligned<256, T> { typedef __declspec(align(256)) T type; };
#else
	template<typename T> struct identity { typedef T type; };
#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity<T>::type
#endif
#endif
} }


// TSAN can false report races in lock-free code.  To enable TSAN to be used from projects that use this one,
// we can apply per-function compile-time suppression.
// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
#define MOODYCAMEL_NO_TSAN
#if defined(__has_feature)
 #if __has_feature(thread_sanitizer)
  #undef MOODYCAMEL_NO_TSAN
  #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
 #endif // TSAN
#endif // TSAN

// Compiler-specific likely/unlikely hints
namespace moodycamel { namespace details {
#if defined(__GNUC__)
	static inline bool (likely)(bool x) { return __builtin_expect((x), true); }
	static inline bool (unlikely)(bool x) { return __builtin_expect((x), false); }
#else
	static inline bool (likely)(bool x) { return x; }
	static inline bool (unlikely)(bool x) { return x; }
#endif
} }

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
#include "internal/concurrentqueue_internal_debug.h"
#endif

namespace moodycamel {
namespace details {
	template<typename T>
	struct const_numeric_max {
		static_assert(std::is_integral<T>::value, "const_numeric_max can only be used with integers");
		static const T value = std::numeric_limits<T>::is_signed
			? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast<T>(1)
			: static_cast<T>(-1);
	};

#if defined(__GLIBCXX__)
	typedef ::max_align_t std_max_align_t;      // libstdc++ forgot to add it to std:: for a while
#else
	typedef std::max_align_t std_max_align_t;   // Others (e.g. MSVC) insist it can *only* be accessed via std::
#endif

	// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting
	// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64.
	typedef union {
		std_max_align_t x;
		long long y;
		void* z;
	} max_align_t;
}

// Default traits for the ConcurrentQueue. To change some of the
// traits without re-implementing all of them, inherit from this
// struct and shadow the declarations you wish to be different;
// since the traits are used as a template type parameter, the
// shadowed declarations will be used where defined, and the defaults
// otherwise.
struct ConcurrentQueueDefaultTraits
{
	// General-purpose size type. std::size_t is strongly recommended.
	typedef std::size_t size_t;
	
	// The type used for the enqueue and dequeue indices. Must be at least as
	// large as size_t. Should be significantly larger than the number of elements
	// you expect to hold at once, especially if you have a high turnover rate;
	// for example, on 32-bit x86, if you expect to have over a hundred million
	// elements or pump several million elements through your queue in a very
	// short space of time, using a 32-bit type *may* trigger a race condition.
	// A 64-bit int type is recommended in that case, and in practice will
	// prevent a race condition no matter the usage of the queue. Note that
	// whether the queue is lock-free with a 64-int type depends on the whether
	// std::atomic<std::uint64_t> is lock-free, which is platform-specific.
	typedef std::size_t index_t;
	
	// Internally, all elements are enqueued and dequeued from multi-element
	// blocks; this is the smallest controllable unit. If you expect few elements
	// but many producers, a smaller block size should be favoured. For few producers
	// and/or many elements, a larger block size is preferred. A sane default
	// is provided. Must be a power of 2.
	static const size_t BLOCK_SIZE = 32;
	
	// For explicit producers (i.e. when using a producer token), the block is
	// checked for being empty by iterating through a list of flags, one per element.
	// For large block sizes, this is too inefficient, and switching to an atomic
	// counter-based approach is faster. The switch is made for block sizes strictly
	// larger than this threshold.
	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;
	
	// How many full blocks can be expected for a single explicit producer? This should
	// reflect that number's maximum for optimal performance. Must be a power of 2.
	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;
	
	// How many full blocks can be expected for a single implicit producer? This should
	// reflect that number's maximum for optimal performance. Must be a power of 2.
	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;
	
	// The initial size of the hash table mapping thread IDs to implicit producers.
	// Note that the hash is resized every time it becomes half full.
	// Must be a power of two, and either 0 or at least 1. If 0, implicit production
	// (using the enqueue methods without an explicit producer token) is disabled.
	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;
	
	// Controls the number of items that an explicit consumer (i.e. one with a token)
	// must consume before it causes all consumers to rotate and move on to the next
	// internal queue.
	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256;
	
	// The maximum number of elements (inclusive) that can be enqueued to a sub-queue.
	// Enqueue operations that would cause this limit to be surpassed will fail. Note
	// that this limit is enforced at the block level (for performance reasons), i.e.
	// it's rounded up to the nearest block size.
	static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max<size_t>::value;

	// The number of times to spin before sleeping when waiting on a semaphore.
	// Recommended values are on the order of 1000-10000 unless the number of
	// consumer threads exceeds the number of idle cores (in which case try 0-100).
	// Only affects instances of the BlockingConcurrentQueue.
	static const int MAX_SEMA_SPINS = 10000;

	// Whether to recycle dynamically-allocated blocks into an internal free list or
	// not. If false, only pre-allocated blocks (controlled by the constructor
	// arguments) will be recycled, and all others will be `free`d back to the heap.
	// Note that blocks consumed by explicit producers are only freed on destruction
	// of the queue (not following destruction of the token) regardless of this trait.
	static const bool RECYCLE_ALLOCATED_BLOCKS = false;

	
#ifndef MCDBGQ_USE_RELACY
	// Memory allocation can be customized if needed.
	// malloc should return nullptr on failure, and handle alignment like std::malloc.
#if defined(malloc) || defined(free)
	// Gah, this is 2015, stop defining macros that break standard code already!
	// Work around malloc/free being special macros:
	static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); }
	static inline void WORKAROUND_free(void* ptr) { return free(ptr); }
	static inline void* (malloc)(size_t size) { return WORKAROUND_malloc(size); }
	static inline void (free)(void* ptr) { return WORKAROUND_free(ptr); }
#else
	static inline void* malloc(size_t size) { return std::malloc(size); }
	static inline void free(void* ptr) { return std::free(ptr); }
#endif
#else
	// Debug versions when running under the Relacy race detector (ignore
	// these in user code)
	static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); }
	static inline void free(void* ptr) { return rl::rl_free(ptr, $); }
#endif
};


// When producing or consuming many elements, the most efficient way is to:
//    1) Use one of the bulk-operation methods of the queue with a token
//    2) Failing that, use the bulk-operation methods without a token
//    3) Failing that, create a token and use that with the single-item methods
//    4) Failing that, use the single-parameter methods of the queue
// Having said that, don't create tokens willy-nilly -- ideally there should be
// a maximum of one token per thread (of each kind).
struct ProducerToken;
struct ConsumerToken;

template<typename T, typename Traits> class ConcurrentQueue;
template<typename T, typename Traits> class BlockingConcurrentQueue;
class ConcurrentQueueTests;


namespace details
{
	struct ConcurrentQueueProducerTypelessBase
	{
		ConcurrentQueueProducerTypelessBase* next;
		std::atomic<bool> inactive;
		ProducerToken* token;
		
		ConcurrentQueueProducerTypelessBase()
			: next(nullptr), inactive(false), token(nullptr)
		{
		}
	};
	
	template<bool use32> struct _hash_32_or_64 {
		static inline std::uint32_t hash(std::uint32_t h)
		{
			// MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
			// Since the thread ID is already unique, all we really want to do is propagate that
			// uniqueness evenly across all the bits, so that we can use a subset of the bits while
			// reducing collisions significantly
			h ^= h >> 16;
			h *= 0x85ebca6b;
			h ^= h >> 13;
			h *= 0xc2b2ae35;
			return h ^ (h >> 16);
		}
	};
	template<> struct _hash_32_or_64<1> {
		static inline std::uint64_t hash(std::uint64_t h)
		{
			h ^= h >> 33;
			h *= 0xff51afd7ed558ccd;
			h ^= h >> 33;
			h *= 0xc4ceb9fe1a85ec53;
			return h ^ (h >> 33);
		}
	};
	template<std::size_t size> struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {  };
	
	static inline size_t hash_thread_id(thread_id_t id)
	{
		static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values");
		return static_cast<size_t>(hash_32_or_64<sizeof(thread_id_converter<thread_id_t>::thread_id_hash_t)>::hash(
			thread_id_converter<thread_id_t>::prehash(id)));
	}
	
	template<typename T>
	static inline bool circular_less_than(T a, T b)
	{
		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "circular_less_than is intended to be used only with unsigned integer types");
		return static_cast<T>(a - b) > static_cast<T>(static_cast<T>(1) << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));
		// Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
		//       silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here.
	}
	
	template<typename U>
	static inline char* align_for(char* ptr)
	{
		const std::size_t alignment = std::alignment_of<U>::value;
		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
	}

	template<typename T>
	static inline T ceil_to_pow_2(T x)
	{
		static_assert(std::is_integral<T>::value && !std::numeric_limits<T>::is_signed, "ceil_to_pow_2 is intended to be used only with unsigned integer types");

		// Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
		--x;
		x |= x >> 1;
		x |= x >> 2;
		x |= x >> 4;
		for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
			x |= x >> (i << 3);
		}
		++x;
		return x;
	}
	
	template<typename T>
	static inline void swap_relaxed(std::atomic<T>& left, std::atomic<T>& right)
	{
		T temp = std::move(left.load(std::memory_order_relaxed));
		left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed);
		right.store(std::move(temp), std::memory_order_relaxed);
	}
	
	template<typename T>
	static inline T const& nomove(T const& x)
	{
		return x;
	}
	
	template<bool Enable>
	struct nomove_if
	{
		template<typename T>
		static inline T const& eval(T const& x)
		{
			return x;
		}
	};
	
	template<>
	struct nomove_if<false>
	{
		template<typename U>
		static inline auto eval(U&& x)
			-> decltype(std::forward<U>(x))
		{
			return std::forward<U>(x);
		}
	};
	
	template<typename It>
	static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT -> decltype(*it)
	{
		return *it;
	}
	
#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
	template<typename T> struct is_trivially_destructible : std::is_trivially_destructible<T> { };
#else
	template<typename T> struct is_trivially_destructible : std::has_trivial_destructor<T> { };
#endif
	
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#ifdef MCDBGQ_USE_RELACY
	typedef RelacyThreadExitListener ThreadExitListener;
	typedef RelacyThreadExitNotifier ThreadExitNotifier;
#else
	class ThreadExitNotifier;

	struct ThreadExitListener
	{
		typedef void (*callback_t)(void*);
		callback_t callback;
		void* userData;
		
		ThreadExitListener* next;		// reserved for use by the ThreadExitNotifier
		ThreadExitNotifier* chain;		// reserved for use by the ThreadExitNotifier
	};

	class ThreadExitNotifier
	{
	public:
		static void subscribe(ThreadExitListener* listener)
		{
			auto& tlsInst = instance();
			std::lock_guard<std::mutex> guard(mutex());
			listener->next = tlsInst.tail;
			listener->chain = &tlsInst;
			tlsInst.tail = listener;
		}
		
		static void unsubscribe(ThreadExitListener* listener)
		{
			std::lock_guard<std::mutex> guard(mutex());
			if (!listener->chain) {
				return;  // race with ~ThreadExitNotifier
			}
			auto& tlsInst = *listener->chain;
			listener->chain = nullptr;
			ThreadExitListener** prev = &tlsInst.tail;
			for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
				if (ptr == listener) {
					*prev = ptr->next;
					break;
				}
				prev = &ptr->next;
			}
		}
		
	private:
		ThreadExitNotifier() : tail(nullptr) { }
		ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
		ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION;
		
		~ThreadExitNotifier()
		{
			// This thread is about to exit, let everyone know!
			assert(this == &instance() && "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
			std::lock_guard<std::mutex> guard(mutex());
			for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
				ptr->chain = nullptr;
				ptr->callback(ptr->userData);
			}
		}
		
		// Thread-local
		static inline ThreadExitNotifier& instance()
		{
			static thread_local ThreadExitNotifier notifier;
			return notifier;
		}

		static inline std::mutex& mutex()
		{
			// Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called
			static std::mutex mutex;
			return mutex;
		}
		
	private:
		ThreadExitListener* tail;
	};
#endif
#endif
	
	template<typename T> struct static_is_lock_free_num { enum { value = 0 }; };
	template<> struct static_is_lock_free_num<signed char> { enum { value = ATOMIC_CHAR_LOCK_FREE }; };
	template<> struct static_is_lock_free_num<short> { enum { value = ATOMIC_SHORT_LOCK_FREE }; };
	template<> struct static_is_lock_free_num<int> { enum { value = ATOMIC_INT_LOCK_FREE }; };
	template<> struct static_is_lock_free_num<long> { enum { value = ATOMIC_LONG_LOCK_FREE }; };
	template<> struct static_is_lock_free_num<long long> { enum { value = ATOMIC_LLONG_LOCK_FREE }; };
	template<typename T> struct static_is_lock_free : static_is_lock_free_num<typename std::make_signed<T>::type> {  };
	template<> struct static_is_lock_free<bool> { enum { value = ATOMIC_BOOL_LOCK_FREE }; };
	template<typename U> struct static_is_lock_free<U*> { enum { value = ATOMIC_POINTER_LOCK_FREE }; };
}


struct ProducerToken
{
	template<typename T, typename Traits>
	explicit ProducerToken(ConcurrentQueue<T, Traits>& queue);
	
	template<typename T, typename Traits>
	explicit ProducerToken(BlockingConcurrentQueue<T, Traits>& queue);
	
	ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
		: producer(other.producer)
	{
		other.producer = nullptr;
		if (producer != nullptr) {
			producer->token = this;
		}
	}
	
	inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT
	{
		swap(other);
		return *this;
	}
	
	void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT
	{
		std::swap(producer, other.producer);
		if (producer != nullptr) {
			producer->token = this;
		}
		if (other.producer != nullptr) {
			other.producer->token = &other;
		}
	}
	
	// A token is always valid unless:
	//     1) Memory allocation failed during construction
	//     2) It was moved via the move constructor
	//        (Note: assignment does a swap, leaving both potentially valid)
	//     3) The associated queue was destroyed
	// Note that if valid() returns true, that only indicates
	// that the token is valid for use with a specific queue,
	// but not which one; that's up to the user to track.
	inline bool valid() const { return producer != nullptr; }
	
	~ProducerToken()
	{
		if (producer != nullptr) {
			producer->token = nullptr;
			producer->inactive.store(true, std::memory_order_release);
		}
	}
	
	// Disable copying and assignment
	ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
	ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION;
	
private:
	template<typename T, typename Traits> friend class ConcurrentQueue;
	friend class ConcurrentQueueTests;
	
protected:
	details::ConcurrentQueueProducerTypelessBase* producer;
};


struct ConsumerToken
{
	template<typename T, typename Traits>
	explicit ConsumerToken(ConcurrentQueue<T, Traits>& q);
	
	template<typename T, typename Traits>
	explicit ConsumerToken(BlockingConcurrentQueue<T, Traits>& q);
	
	ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
		: initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), desiredProducer(other.desiredProducer)
	{
	}
	
	inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT
	{
		swap(other);
		return *this;
	}
	
	void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT
	{
		std::swap(initialOffset, other.initialOffset);
		std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
		std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
		std::swap(currentProducer, other.currentProducer);
		std::swap(desiredProducer, other.desiredProducer);
	}
	
	// Disable copying and assignment
	ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;
	ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION;

private:
	template<typename T, typename Traits> friend class ConcurrentQueue;
	friend class ConcurrentQueueTests;
	
private: // but shared with ConcurrentQueue
	std::uint32_t initialOffset;
	std::uint32_t lastKnownGlobalOffset;
	std::uint32_t itemsConsumedFromCurrent;
	details::ConcurrentQueueProducerTypelessBase* currentProducer;
	details::ConcurrentQueueProducerTypelessBase* desiredProducer;
};

// Need to forward-declare this swap because it's in a namespace.
// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
template<typename T, typename Traits>
inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT;


template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
class ConcurrentQueue
{
public:
	typedef ::moodycamel::ProducerToken producer_token_t;
	typedef ::moodycamel::ConsumerToken consumer_token_t;
	
	typedef typename Traits::index_t index_t;
	typedef typename Traits::size_t size_t;
	
	static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
	static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
	static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
	static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
	static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
	static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = static_cast<std::uint32_t>(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4307)		// + integral constant overflow (that's what the ternary expression is for!)
#pragma warning(disable: 4309)		// static_cast: Truncation of constant value
#endif
	static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max<size_t>::value - static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) < BLOCK_SIZE) ? details::const_numeric_max<size_t>::value : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE * BLOCK_SIZE);
#ifdef _MSC_VER
#pragma warning(pop)
#endif

	static_assert(!std::numeric_limits<size_t>::is_signed && std::is_integral<size_t>::value, "Traits::size_t must be an unsigned integral type");
	static_assert(!std::numeric_limits<index_t>::is_signed && std::is_integral<index_t>::value, "Traits::index_t must be an unsigned integral type");
	static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t");
	static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
	static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)");
	static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
	static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)");
	static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
	static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)");

public:
	// Creates a queue with at least `capacity` element slots; note that the
	// actual number of elements that can be inserted without additional memory
	// allocation depends on the number of producers and the block size (e.g. if
	// the block size is equal to `capacity`, only a single block will be allocated
	// up-front, which means only a single producer will be able to enqueue elements
	// without an extra allocation -- blocks aren't shared between producers).
	// This method is not thread safe -- it is up to the user to ensure that the
	// queue is fully constructed before it starts being used by other threads (this
	// includes making the memory effects of construction visible, possibly with a
	// memory barrier).
	explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)
		: producerListTail(nullptr),
		producerCount(0),
		initialBlockPoolIndex(0),
		nextExplicitConsumerId(0),
		globalExplicitConsumerOffset(0)
	{
		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
		populate_initial_implicit_producer_hash();
		populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
		// Track all the producers using a fully-resolved typed list for
		// each kind; this makes it possible to debug them starting from
		// the root queue object (otherwise wacky casts are needed that
		// don't compile in the debugger's expression evaluator).
		explicitProducers.store(nullptr, std::memory_order_relaxed);
		implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
	}
	
	// Computes the correct amount of pre-allocated blocks for you based
	// on the minimum number of elements you want available at any given
	// time, and the maximum concurrent number of each type of producer.
	ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
		: producerListTail(nullptr),
		producerCount(0),
		initialBlockPoolIndex(0),
		nextExplicitConsumerId(0),
		globalExplicitConsumerOffset(0)
	{
		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
		populate_initial_implicit_producer_hash();
		size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 * (maxExplicitProducers + maxImplicitProducers);
		populate_initial_block_list(blocks);
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
		explicitProducers.store(nullptr, std::memory_order_relaxed);
		implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
	}
	
	// Note: The queue should not be accessed concurrently while it's
	// being deleted. It's up to the user to synchronize this.
	// This method is not thread safe.
	~ConcurrentQueue()
	{
		// Destroy producers
		auto ptr = producerListTail.load(std::memory_order_relaxed);
		while (ptr != nullptr) {
			auto next = ptr->next_prod();
			if (ptr->token != nullptr) {
				ptr->token->producer = nullptr;
			}
			destroy(ptr);
			ptr = next;
		}
		
		// Destroy implicit producer hash tables
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
			auto hash = implicitProducerHash.load(std::memory_order_relaxed);
			while (hash != nullptr) {
				auto prev = hash->prev;
				if (prev != nullptr) {		// The last hash is part of this object and was not allocated dynamically
					for (size_t i = 0; i != hash->capacity; ++i) {
						hash->entries[i].~ImplicitProducerKVP();
					}
					hash->~ImplicitProducerHash();
					(Traits::free)(hash);
				}
				hash = prev;
			}
		}
		
		// Destroy global free list
		auto block = freeList.head_unsafe();
		while (block != nullptr) {
			auto next = block->freeListNext.load(std::memory_order_relaxed);
			if (block->dynamicallyAllocated) {
				destroy(block);
			}
			block = next;
		}
		
		// Destroy initial free list
		destroy_array(initialBlockPool, initialBlockPoolSize);
	}

	// Disable copying and copy assignment
	ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
	ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
	
	// Moving is supported, but note that it is *not* a thread-safe operation.
	// Nobody can use the queue while it's being moved, and the memory effects
	// of that move must be propagated to other threads before they can use it.
	// Note: When a queue is moved, its tokens are still valid but can only be
	// used with the destination queue (i.e. semantically they are moved along
	// with the queue itself).
	ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
		: producerListTail(other.producerListTail.load(std::memory_order_relaxed)),
		producerCount(other.producerCount.load(std::memory_order_relaxed)),
		initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
		initialBlockPool(other.initialBlockPool),
		initialBlockPoolSize(other.initialBlockPoolSize),
		freeList(std::move(other.freeList)),
		nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
		globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed))
	{
		// Move the other one into this, and leave the other one as an empty queue
		implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
		populate_initial_implicit_producer_hash();
		swap_implicit_producer_hashes(other);
		
		other.producerListTail.store(nullptr, std::memory_order_relaxed);
		other.producerCount.store(0, std::memory_order_relaxed);
		other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
		other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
		explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
		other.explicitProducers.store(nullptr, std::memory_order_relaxed);
		implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed);
		other.implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
		
		other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
		other.initialBlockPoolSize = 0;
		other.initialBlockPool = nullptr;
		
		reown_producers();
	}
	
	inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
	{
		return swap_internal(other);
	}
	
	// Swaps this queue's state with the other's. Not thread-safe.
	// Swapping two queues does not invalidate their tokens, however
	// the tokens that were created for one queue must be used with
	// only the swapped queue (i.e. the tokens are tied to the
	// queue's movable state, not the object itself).
	inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
	{
		swap_internal(other);
	}
	
private:
	ConcurrentQueue& swap_internal(ConcurrentQueue& other)
	{
		if (this == &other) {
			return *this;
		}
		
		details::swap_relaxed(producerListTail, other.producerListTail);
		details::swap_relaxed(producerCount, other.producerCount);
		details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
		std::swap(initialBlockPool, other.initialBlockPool);
		std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
		freeList.swap(other.freeList);
		details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
		details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset);
		
		swap_implicit_producer_hashes(other);
		
		reown_producers();
		other.reown_producers();
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
		details::swap_relaxed(explicitProducers, other.explicitProducers);
		details::swap_relaxed(implicitProducers, other.implicitProducers);
#endif
		
		return *this;
	}
	
public:
	// Enqueues a single item (by copying it).
	// Allocates memory if required. Only fails if memory allocation fails (or implicit
	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(T const& item)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
		else return inner_enqueue<CanAlloc>(item);
	}
	
	// Enqueues a single item (by moving it, if possible).
	// Allocates memory if required. Only fails if memory allocation fails (or implicit
	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
	// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(T&& item)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
		else return inner_enqueue<CanAlloc>(std::move(item));
	}
	
	// Enqueues a single item (by copying it) using an explicit producer token.
	// Allocates memory if required. Only fails if memory allocation fails (or
	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(producer_token_t const& token, T const& item)
	{
		return inner_enqueue<CanAlloc>(token, item);
	}
	
	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
	// Allocates memory if required. Only fails if memory allocation fails (or
	// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Thread-safe.
	inline bool enqueue(producer_token_t const& token, T&& item)
	{
		return inner_enqueue<CanAlloc>(token, std::move(item));
	}
	
	// Enqueues several items.
	// Allocates memory if required. Only fails if memory allocation fails (or
	// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
	// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
	// Thread-safe.
	template<typename It>
	bool enqueue_bulk(It itemFirst, size_t count)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
		else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
	}
	
	// Enqueues several items using an explicit producer token.
	// Allocates memory if required. Only fails if memory allocation fails
	// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
	// Note: Use std::make_move_iterator if the elements should be moved
	// instead of copied.
	// Thread-safe.
	template<typename It>
	bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
	{
		return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
	}
	
	// Enqueues a single item (by copying it).
	// Does not allocate memory. Fails if not enough room to enqueue (or implicit
	// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
	// is 0).
	// Thread-safe.
	inline bool try_enqueue(T const& item)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
		else return inner_enqueue<CannotAlloc>(item);
	}
	
	// Enqueues a single item (by moving it, if possible).
	// Does not allocate memory (except for one-time implicit producer).
	// Fails if not enough room to enqueue (or implicit production is
	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
	// Thread-safe.
	inline bool try_enqueue(T&& item)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
		else return inner_enqueue<CannotAlloc>(std::move(item));
	}
	
	// Enqueues a single item (by copying it) using an explicit producer token.
	// Does not allocate memory. Fails if not enough room to enqueue.
	// Thread-safe.
	inline bool try_enqueue(producer_token_t const& token, T const& item)
	{
		return inner_enqueue<CannotAlloc>(token, item);
	}
	
	// Enqueues a single item (by moving it, if possible) using an explicit producer token.
	// Does not allocate memory. Fails if not enough room to enqueue.
	// Thread-safe.
	inline bool try_enqueue(producer_token_t const& token, T&& item)
	{
		return inner_enqueue<CannotAlloc>(token, std::move(item));
	}
	
	// Enqueues several items.
	// Does not allocate memory (except for one-time implicit producer).
	// Fails if not enough room to enqueue (or implicit production is
	// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
	// Note: Use std::make_move_iterator if the elements should be moved
	// instead of copied.
	// Thread-safe.
	template<typename It>
	bool try_enqueue_bulk(It itemFirst, size_t count)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false;
		else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
	}
	
	// Enqueues several items using an explicit producer token.
	// Does not allocate memory. Fails if not enough room to enqueue.
	// Note: Use std::make_move_iterator if the elements should be moved
	// instead of copied.
	// Thread-safe.
	template<typename It>
	bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
	{
		return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
	}
	
	
	// Attempts to dequeue from the queue.
	// Returns false if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename U>
	bool try_dequeue(U& item)
	{
		// Instead of simply trying each producer in turn (which could cause needless contention on the first
		// producer), we score them heuristically.
		size_t nonEmptyCount = 0;
		ProducerBase* best = nullptr;
		size_t bestSize = 0;
		for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
			auto size = ptr->size_approx();
			if (size > 0) {
				if (size > bestSize) {
					bestSize = size;
					best = ptr;
				}
				++nonEmptyCount;
			}
		}
		
		// If there was at least one non-empty queue but it appears empty at the time
		// we try to dequeue from it, we need to make sure every queue's been tried
		if (nonEmptyCount > 0) {
			if ((details::likely)(best->dequeue(item))) {
				return true;
			}
			for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
				if (ptr != best && ptr->dequeue(item)) {
					return true;
				}
			}
		}
		return false;
	}
	
	// Attempts to dequeue from the queue.
	// Returns false if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// This differs from the try_dequeue(item) method in that this one does
	// not attempt to reduce contention by interleaving the order that producer
	// streams are dequeued from. So, using this method can reduce overall throughput
	// under contention, but will give more predictable results in single-threaded
	// consumer scenarios. This is mostly only useful for internal unit tests.
	// Never allocates. Thread-safe.
	template<typename U>
	bool try_dequeue_non_interleaved(U& item)
	{
		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
			if (ptr->dequeue(item)) {
				return true;
			}
		}
		return false;
	}
	
	// Attempts to dequeue from the queue using an explicit consumer token.
	// Returns false if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename U>
	bool try_dequeue(consumer_token_t& token, U& item)
	{
		// The idea is roughly as follows:
		// Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less
		// If you see that the global offset has changed, you must reset your consumption counter and move to your designated place
		// If there's no items where you're supposed to be, keep moving until you find a producer with some items
		// If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it
		
		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
			if (!update_current_producer_after_rotation(token)) {
				return false;
			}
		}
		
		// If there was at least one non-empty queue but it appears empty at the time
		// we try to dequeue from it, we need to make sure every queue's been tried
		if (static_cast<ProducerBase*>(token.currentProducer)->dequeue(item)) {
			if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
			}
			return true;
		}
		
		auto tail = producerListTail.load(std::memory_order_acquire);
		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
		if (ptr == nullptr) {
			ptr = tail;
		}
		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
			if (ptr->dequeue(item)) {
				token.currentProducer = ptr;
				token.itemsConsumedFromCurrent = 1;
				return true;
			}
			ptr = ptr->next_prod();
			if (ptr == nullptr) {
				ptr = tail;
			}
		}
		return false;
	}
	
	// Attempts to dequeue several elements from the queue.
	// Returns the number of items actually dequeued.
	// Returns 0 if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename It>
	size_t try_dequeue_bulk(It itemFirst, size_t max)
	{
		size_t count = 0;
		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
			count += ptr->dequeue_bulk(itemFirst, max - count);
			if (count == max) {
				break;
			}
		}
		return count;
	}
	
	// Attempts to dequeue several elements from the queue using an explicit consumer token.
	// Returns the number of items actually dequeued.
	// Returns 0 if all producer streams appeared empty at the time they
	// were checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename It>
	size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
	{
		if (token.desiredProducer == nullptr || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
			if (!update_current_producer_after_rotation(token)) {
				return 0;
			}
		}
		
		size_t count = static_cast<ProducerBase*>(token.currentProducer)->dequeue_bulk(itemFirst, max);
		if (count == max) {
			if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
				globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
			}
			return max;
		}
		token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
		max -= count;
		
		auto tail = producerListTail.load(std::memory_order_acquire);
		auto ptr = static_cast<ProducerBase*>(token.currentProducer)->next_prod();
		if (ptr == nullptr) {
			ptr = tail;
		}
		while (ptr != static_cast<ProducerBase*>(token.currentProducer)) {
			auto dequeued = ptr->dequeue_bulk(itemFirst, max);
			count += dequeued;
			if (dequeued != 0) {
				token.currentProducer = ptr;
				token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
			}
			if (dequeued == max) {
				break;
			}
			max -= dequeued;
			ptr = ptr->next_prod();
			if (ptr == nullptr) {
				ptr = tail;
			}
		}
		return count;
	}
	
	
	// Attempts to dequeue from a specific producer's inner queue.
	// If you happen to know which producer you want to dequeue from, this
	// is significantly faster than using the general-case try_dequeue methods.
	// Returns false if the producer's queue appeared empty at the time it
	// was checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename U>
	inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item)
	{
		return static_cast<ExplicitProducer*>(producer.producer)->dequeue(item);
	}
	
	// Attempts to dequeue several elements from a specific producer's inner queue.
	// Returns the number of items actually dequeued.
	// If you happen to know which producer you want to dequeue from, this
	// is significantly faster than using the general-case try_dequeue methods.
	// Returns 0 if the producer's queue appeared empty at the time it
	// was checked (so, the queue is likely but not guaranteed to be empty).
	// Never allocates. Thread-safe.
	template<typename It>
	inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max)
	{
		return static_cast<ExplicitProducer*>(producer.producer)->dequeue_bulk(itemFirst, max);
	}
	
	
	// Returns an estimate of the total number of elements currently in the queue. This
	// estimate is only accurate if the queue has completely stabilized before it is called
	// (i.e. all enqueue and dequeue operations have completed and their memory effects are
	// visible on the calling thread, and no further operations start while this method is
	// being called).
	// Thread-safe.
	size_t size_approx() const
	{
		size_t size = 0;
		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
			size += ptr->size_approx();
		}
		return size;
	}
	
	
	// Returns true if the underlying atomic variables used by
	// the queue are lock-free (they should be on most platforms).
	// Thread-safe.
	static constexpr bool is_lock_free()
	{
		return
			details::static_is_lock_free<bool>::value == 2 &&
			details::static_is_lock_free<size_t>::value == 2 &&
			details::static_is_lock_free<std::uint32_t>::value == 2 &&
			details::static_is_lock_free<index_t>::value == 2 &&
			details::static_is_lock_free<void*>::value == 2 &&
			details::static_is_lock_free<typename details::thread_id_converter<details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
	}


private:
	friend struct ProducerToken;
	friend struct ConsumerToken;
	struct ExplicitProducer;
	friend struct ExplicitProducer;
	struct ImplicitProducer;
	friend struct ImplicitProducer;
	friend class ConcurrentQueueTests;
		
	enum AllocationMode { CanAlloc, CannotAlloc };
	
	
	///////////////////////////////
	// Queue methods
	///////////////////////////////
	
	template<AllocationMode canAlloc, typename U>
	inline bool inner_enqueue(producer_token_t const& token, U&& element)
	{
		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
	}
	
	template<AllocationMode canAlloc, typename U>
	inline bool inner_enqueue(U&& element)
	{
		auto producer = get_or_add_implicit_producer();
		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue<canAlloc>(std::forward<U>(element));
	}
	
	template<AllocationMode canAlloc, typename It>
	inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
	{
		return static_cast<ExplicitProducer*>(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
	}
	
	template<AllocationMode canAlloc, typename It>
	inline bool inner_enqueue_bulk(It itemFirst, size_t count)
	{
		auto producer = get_or_add_implicit_producer();
		return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk<canAlloc>(itemFirst, count);
	}
	
	inline bool update_current_producer_after_rotation(consumer_token_t& token)
	{
		// Ah, there's been a rotation, figure out where we should be!
		auto tail = producerListTail.load(std::memory_order_acquire);
		if (token.desiredProducer == nullptr && tail == nullptr) {
			return false;
		}
		auto prodCount = producerCount.load(std::memory_order_relaxed);
		auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed);
		if ((details::unlikely)(token.desiredProducer == nullptr)) {
			// Aha, first time we're dequeueing anything.
			// Figure out our local position
			// Note: offset is from start, not end, but we're traversing from end -- subtract from count first
			std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
			token.desiredProducer = tail;
			for (std::uint32_t i = 0; i != offset; ++i) {
				token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
				if (token.desiredProducer == nullptr) {
					token.desiredProducer = tail;
				}
			}
		}
		
		std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
		if (delta >= prodCount) {
			delta = delta % prodCount;
		}
		for (std::uint32_t i = 0; i != delta; ++i) {
			token.desiredProducer = static_cast<ProducerBase*>(token.desiredProducer)->next_prod();
			if (token.desiredProducer == nullptr) {
				token.desiredProducer = tail;
			}
		}
		
		token.lastKnownGlobalOffset = globalOffset;
		token.currentProducer = token.desiredProducer;
		token.itemsConsumedFromCurrent = 0;
		return true;
	}
	
	
	///////////////////////////
	// Free list
	///////////////////////////
	
	template <typename N>
	struct FreeListNode
	{
		FreeListNode() : freeListRefs(0), freeListNext(nullptr) { }
		
		std::atomic<std::uint32_t> freeListRefs;
		std::atomic<N*> freeListNext;
	};
	
	// A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but
	// simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly
	// speedy under low contention.
	template<typename N>		// N must inherit FreeListNode or have the same fields (and initialization of them)
	struct FreeList
	{
		FreeList() : freeListHead(nullptr) { }
		FreeList(FreeList&& other) : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) { other.freeListHead.store(nullptr, std::memory_order_relaxed); }
		void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); }
		
		FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
		FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION;
		
		inline void add(N* node)
		{
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
			debug::DebugLock lock(mutex);
#endif		
			// We know that the should-be-on-freelist bit is 0 at this point, so it's safe to
			// set it using a fetch_add
			if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) {
				// Oh look! We were the last ones referencing this node, and we know
				// we want to add it to the free list, so let's do it!
		 		add_knowing_refcount_is_zero(node);
			}
		}
		
		inline N* try_get()
		{
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
			debug::DebugLock lock(mutex);
#endif		
			auto head = freeListHead.load(std::memory_order_acquire);
			while (head != nullptr) {
				auto prevHead = head;
				auto refs = head->freeListRefs.load(std::memory_order_relaxed);
				if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, std::memory_order_relaxed)) {
					head = freeListHead.load(std::memory_order_acquire);
					continue;
				}
				
				// Good, reference count has been incremented (it wasn't at zero), which means we can read the
				// next and not worry about it changing between now and the time we do the CAS
				auto next = head->freeListNext.load(std::memory_order_relaxed);
				if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) {
					// Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no
					// matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on).
					assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0);
					
					// Decrease refcount twice, once for our ref, and once for the list's ref
					head->freeListRefs.fetch_sub(2, std::memory_order_release);
					return head;
				}
				
				// OK, the head must have changed on us, but we still need to decrease the refcount we increased.
				// Note that we don't need to release any memory effects, but we do need to ensure that the reference
				// count decrement happens-after the CAS on the head.
				refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
				if (refs == SHOULD_BE_ON_FREELIST + 1) {
					add_knowing_refcount_is_zero(prevHead);
				}
			}
			
			return nullptr;
		}
		
		// Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes)
		N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); }
		
	private:
		inline void add_knowing_refcount_is_zero(N* node)
		{
			// Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run
			// only one copy of this method per node at a time, i.e. the single thread case), then we know
			// we can safely change the next pointer of the node; however, once the refcount is back above
			// zero, then other threads could increase it (happens under heavy contention, when the refcount
			// goes to zero in between a load and a refcount increment of a node in try_get, then back up to
			// something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS
			// to add the node to the actual list fails, decrease the refcount and leave the add operation to
			// the next thread who puts the refcount back at zero (which could be us, hence the loop).
			auto head = freeListHead.load(std::memory_order_relaxed);
			while (true) {
				node->freeListNext.store(head, std::memory_order_relaxed);
				node->freeListRefs.store(1, std::memory_order_release);
				if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) {
					// Hmm, the add failed, but we can only try again when the refcount goes back to zero
					if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) {
						continue;
					}
				}
				return;
			}
		}
		
	private:
		// Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention)
		std::atomic<N*> freeListHead;
	
	static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
	static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;
		
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
		debug::DebugMutex mutex;
#endif
	};
	
	
	///////////////////////////
	// Block
	///////////////////////////
	
	enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };
	
	struct Block
	{
		Block()
			: next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true)
		{
#ifdef MCDBGQ_TRACKMEM
			owner = nullptr;
#endif
		}
		
		template<InnerQueueContext context>
		inline bool is_empty() const
		{
			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
				// Check flags
				for (size_t i = 0; i < BLOCK_SIZE; ++i) {
					if (!emptyFlags[i].load(std::memory_order_relaxed)) {
						return false;
					}
				}
				
				// Aha, empty; make sure we have all other memory effects that happened before the empty flags were set
				std::atomic_thread_fence(std::memory_order_acquire);
				return true;
			}
			else {
				// Check counter
				if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) {
					std::atomic_thread_fence(std::memory_order_acquire);
					return true;
				}
				assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE);
				return false;
			}
		}
		
		// Returns true if the block is now empty (does not apply in explicit context)
		template<InnerQueueContext context>
		inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i)
		{
			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
				// Set flag
				assert(!emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].load(std::memory_order_relaxed));
				emptyFlags[BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1))].store(true, std::memory_order_release);
				return false;
			}
			else {
				// Increment counter
				auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release);
				assert(prevVal < BLOCK_SIZE);
				return prevVal == BLOCK_SIZE - 1;
			}
		}
		
		// Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0).
		// Returns true if the block is now empty (does not apply in explicit context).
		template<InnerQueueContext context>
		inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count)
		{
			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
				// Set flags
				std::atomic_thread_fence(std::memory_order_release);
				i = BLOCK_SIZE - 1 - static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) - count + 1;
				for (size_t j = 0; j != count; ++j) {
					assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
					emptyFlags[i + j].store(true, std::memory_order_relaxed);
				}
				return false;
			}
			else {
				// Increment counter
				auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release);
				assert(prevVal + count <= BLOCK_SIZE);
				return prevVal + count == BLOCK_SIZE;
			}
		}
		
		template<InnerQueueContext context>
		inline void set_all_empty()
		{
			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
				// Set all flags
				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
					emptyFlags[i].store(true, std::memory_order_relaxed);
				}
			}
			else {
				// Reset counter
				elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
			}
		}
		
		template<InnerQueueContext context>
		inline void reset_empty()
		{
			MOODYCAMEL_CONSTEXPR_IF (context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
				// Reset flags
				for (size_t i = 0; i != BLOCK_SIZE; ++i) {
					emptyFlags[i].store(false, std::memory_order_relaxed);
				}
			}
			else {
				// Reset counter
				elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
			}
		}
		
		inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT { return static_cast<T*>(static_cast<void*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
		inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT { return static_cast<T const*>(static_cast<void const*>(elements)) + static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1)); }
		
	private:
		static_assert(std::alignment_of<T>::value <= sizeof(T), "The queue does not support types with an alignment greater than their size at this time");
		MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;
	public:
		Block* next;
		std::atomic<size_t> elementsCompletelyDequeued;
		std::atomic<bool> emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];
	public:
		std::atomic<std::uint32_t> freeListRefs;
		std::atomic<Block*> freeListNext;
		bool dynamicallyAllocated;		// Perhaps a better name for this would be 'isNotPartOfInitialBlockPool'
		
#ifdef MCDBGQ_TRACKMEM
		void* owner;
#endif
	};
	static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value, "Internal error: Blocks must be at least as aligned as the type they are wrapping");


#ifdef MCDBGQ_TRACKMEM
public:
	struct MemStats;
private:
#endif
	
	///////////////////////////
	// Producer base
	///////////////////////////
	
	struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase
	{
		ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) :
			tailIndex(0),
			headIndex(0),
			dequeueOptimisticCount(0),
			dequeueOvercommit(0),
			tailBlock(nullptr),
			isExplicit(isExplicit_),
			parent(parent_)
		{
		}
		
		virtual ~ProducerBase() { }
		
		template<typename U>
		inline bool dequeue(U& element)
		{
			if (isExplicit) {
				return static_cast<ExplicitProducer*>(this)->dequeue(element);
			}
			else {
				return static_cast<ImplicitProducer*>(this)->dequeue(element);
			}
		}
		
		template<typename It>
		inline size_t dequeue_bulk(It& itemFirst, size_t max)
		{
			if (isExplicit) {
				return static_cast<ExplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
			}
			else {
				return static_cast<ImplicitProducer*>(this)->dequeue_bulk(itemFirst, max);
			}
		}
		
		inline ProducerBase* next_prod() const { return static_cast<ProducerBase*>(next); }
		
		inline size_t size_approx() const
		{
			auto tail = tailIndex.load(std::memory_order_relaxed);
			auto head = headIndex.load(std::memory_order_relaxed);
			return details::circular_less_than(head, tail) ? static_cast<size_t>(tail - head) : 0;
		}
		
		inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); }
	protected:
		std::atomic<index_t> tailIndex;		// Where to enqueue to next
		std::atomic<index_t> headIndex;		// Where to dequeue from next
		
		std::atomic<index_t> dequeueOptimisticCount;
		std::atomic<index_t> dequeueOvercommit;
		
		Block* tailBlock;
		
	public:
		bool isExplicit;
		ConcurrentQueue* parent;
		
	protected:
#ifdef MCDBGQ_TRACKMEM
		friend struct MemStats;
#endif
	};
	
	
	///////////////////////////
	// Explicit queue
	///////////////////////////
		
	struct ExplicitProducer : public ProducerBase
	{
		explicit ExplicitProducer(ConcurrentQueue* parent_) :
			ProducerBase(parent_, true),
			blockIndex(nullptr),
			pr_blockIndexSlotsUsed(0),
			pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
			pr_blockIndexFront(0),
			pr_blockIndexEntries(nullptr),
			pr_blockIndexRaw(nullptr)
		{
			size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
			if (poolBasedIndexSize > pr_blockIndexSize) {
				pr_blockIndexSize = poolBasedIndexSize;
			}
			
			new_block_index(0);		// This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
		}
		
		~ExplicitProducer() override
		{
			// Destruct any elements not yet dequeued.
			// Since we're in the destructor, we can assume all elements
			// are either completely dequeued or completely not (no halfways).
			if (this->tailBlock != nullptr) {		// Note this means there must be a block index too
				// First find the block that's partially dequeued, if any
				Block* halfDequeuedBlock = nullptr;
				if ((this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
					// The head's not on a block boundary, meaning a block somewhere is partially dequeued
					// (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary)
					size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1);
					while (details::circular_less_than<index_t>(pr_blockIndexEntries[i].base + BLOCK_SIZE, this->headIndex.load(std::memory_order_relaxed))) {
						i = (i + 1) & (pr_blockIndexSize - 1);
					}
					assert(details::circular_less_than<index_t>(pr_blockIndexEntries[i].base, this->headIndex.load(std::memory_order_relaxed)));
					halfDequeuedBlock = pr_blockIndexEntries[i].block;
				}
				
				// Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration)
				auto block = this->tailBlock;
				do {
					block = block->next;
					if (block->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
						continue;
					}
					
					size_t i = 0;	// Offset into block
					if (block == halfDequeuedBlock) {
						i = static_cast<size_t>(this->headIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
					}
					
					// Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index
					auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ? BLOCK_SIZE : static_cast<size_t>(this->tailIndex.load(std::memory_order_relaxed) & static_cast<index_t>(BLOCK_SIZE - 1));
					while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) {
						(*block)[i++]->~T();
					}
				} while (block != this->tailBlock);
			}
			
			// Destroy all blocks that we own
			if (this->tailBlock != nullptr) {
				auto block = this->tailBlock;
				do {
					auto nextBlock = block->next;
					this->parent->add_block_to_free_list(block);
					block = nextBlock;
				} while (block != this->tailBlock);
			}
			
			// Destroy the block indices
			auto header = static_cast<BlockIndexHeader*>(pr_blockIndexRaw);
			while (header != nullptr) {
				auto prev = static_cast<BlockIndexHeader*>(header->prev);
				header->~BlockIndexHeader();
				(Traits::free)(header);
				header = prev;
			}
		}
		
		template<AllocationMode allocMode, typename U>
		inline bool enqueue(U&& element)
		{
			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
			index_t newTailIndex = 1 + currentTailIndex;
			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
				// We reached the end of a block, start a new one
				auto startBlock = this->tailBlock;
				auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
				if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
					// We can re-use the block ahead of us, it's empty!					
					this->tailBlock = this->tailBlock->next;
					this->tailBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
					
					// We'll put the block on the block index (guaranteed to be room since we're conceptually removing the
					// last block from it first -- except instead of removing then adding, we can just overwrite).
					// Note that there must be a valid block index here, since even if allocation failed in the ctor,
					// it would have been re-attempted when adding the first block to the queue; since there is such
					// a block, a block index must have been successfully allocated.
				}
				else {
					// Whatever head value we see here is >= the last value we saw here (relatively),
					// and <= its current value. Since we have the most recent tail, the head must be
					// <= to it.
					auto head = this->headIndex.load(std::memory_order_relaxed);
					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
					if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE)
						|| (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
						// We can't enqueue in another block because there's not enough leeway -- the
						// tail could surpass the head by the time the block fills up! (Or we'll exceed
						// the size limit, if the second part of the condition was true.)
						return false;
					}
					// We're going to need a new block; check that the block index has room
					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) {
						// Hmm, the circular block index is already full -- we'll need
						// to allocate a new index. Note pr_blockIndexRaw can only be nullptr if
						// the initial allocation failed in the constructor.
						
						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
							return false;
						}
						else if (!new_block_index(pr_blockIndexSlotsUsed)) {
							return false;
						}
					}
					
					// Insert a new block in the circular linked list
					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
					if (newBlock == nullptr) {
						return false;
					}
#ifdef MCDBGQ_TRACKMEM
					newBlock->owner = this;
#endif
					newBlock->ConcurrentQueue::Block::template reset_empty<explicit_context>();
					if (this->tailBlock == nullptr) {
						newBlock->next = newBlock;
					}
					else {
						newBlock->next = this->tailBlock->next;
						this->tailBlock->next = newBlock;
					}
					this->tailBlock = newBlock;
					++pr_blockIndexSlotsUsed;
				}

				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
					// The constructor may throw. We want the element not to appear in the queue in
					// that case (without corrupting the queue):
					MOODYCAMEL_TRY {
						new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
					}
					MOODYCAMEL_CATCH (...) {
						// Revert change to the current block, but leave the new block available
						// for next time
						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
						this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock;
						MOODYCAMEL_RETHROW;
					}
				}
				else {
					(void)startBlock;
					(void)originalBlockIndexSlotsUsed;
				}
				
				// Add block to block index
				auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
				entry.base = currentTailIndex;
				entry.block = this->tailBlock;
				blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release);
				pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
				
				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
					this->tailIndex.store(newTailIndex, std::memory_order_release);
					return true;
				}
			}
			
			// Enqueue
			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
			
			this->tailIndex.store(newTailIndex, std::memory_order_release);
			return true;
		}
		
		template<typename U>
		bool dequeue(U& element)
		{
			auto tail = this->tailIndex.load(std::memory_order_relaxed);
			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
				// Might be something to dequeue, let's give it a try
				
				// Note that this if is purely for performance purposes in the common case when the queue is
				// empty and the values are eventually consistent -- we may enter here spuriously.
				
				// Note that whatever the values of overcommit and tail are, they are not going to change (unless we
				// change them) and must be the same value at this point (inside the if) as when the if condition was
				// evaluated.

				// We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below.
				// This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in
				// the fetch_add below will result in a value at least as recent as that (and therefore at least as large).
				// Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all
				// read-modify-write operations are guaranteed to work on the latest value in the modification order), but
				// unfortunately that can't be shown to be correct using only the C++11 standard.
				// See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
				std::atomic_thread_fence(std::memory_order_acquire);
				
				// Increment optimistic counter, then check if it went over the boundary
				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
				
				// Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever
				// incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now
				// have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon
				// incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount.
				// However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently)
				// overflow; in such a case, though, the logic still holds since the difference between the two is maintained.
				
				// Note that we reload tail here in case it changed; it will be the same value as before or greater, since
				// this load is sequenced after (happens after) the earlier load above. This is supported by read-read
				// coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order
				tail = this->tailIndex.load(std::memory_order_acquire);
				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
					// Guaranteed to be at least one element to dequeue!
					
					// Get the index. Note that since there's guaranteed to be at least one element, this
					// will never exceed tail. We need to do an acquire-release fence here since it's possible
					// that whatever condition got us to this point was for an earlier enqueued element (that
					// we already see the memory effects for), but that by the time we increment somebody else
					// has incremented it, and we need to see the memory effects for *that* element, which is
					// in such a case is necessarily visible on the thread that incremented it in the first
					// place with the more current condition (they must have acquired a tail that is at least
					// as recent).
					auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
					
					
					// Determine which block the element is in
					
					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
					
					// We need to be careful here about subtracting and dividing because of index wrap-around.
					// When an index wraps, we need to preserve the sign of the offset when dividing it by the
					// block size (in order to get a correct signed block count offset in all cases):
					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
					auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(blockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
					auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block;
					
					// Dequeue
					auto& el = *((*block)[index]);
					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
						// Make sure the element is still fully dequeued and destroyed even if the assignment
						// throws
						struct Guard {
							Block* block;
							index_t index;
							
							~Guard()
							{
								(*block)[index]->~T();
								block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
							}
						} guard = { block, index };

						element = std::move(el); // NOLINT
					}
					else {
						element = std::move(el); // NOLINT
						el.~T(); // NOLINT
						block->ConcurrentQueue::Block::template set_empty<explicit_context>(index);
					}
					
					return true;
				}
				else {
					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);		// Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write
				}
			}
		
			return false;
		}
		
		template<AllocationMode allocMode, typename It>
		bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count)
		{
			// First, we need to make sure we have enough room to enqueue all of the elements;
			// this means pre-allocating blocks and putting them in the block index (but only if
			// all the allocations succeeded).
			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
			auto startBlock = this->tailBlock;
			auto originalBlockIndexFront = pr_blockIndexFront;
			auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
			
			Block* firstAllocatedBlock = nullptr;
			
			// Figure out how many blocks we'll need to allocate, and do so
			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
			if (blockBaseDiff > 0) {
				// Allocate as many blocks as possible from ahead
				while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock && this->tailBlock->next->ConcurrentQueue::Block::template is_empty<explicit_context>()) {
					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
					
					this->tailBlock = this->tailBlock->next;
					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
					
					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
					entry.base = currentTailIndex;
					entry.block = this->tailBlock;
					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
				}
				
				// Now allocate as many blocks as necessary from the block pool
				while (blockBaseDiff > 0) {
					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
					
					auto head = this->headIndex.load(std::memory_order_relaxed);
					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
					if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
						MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
							// Failed to allocate, undo changes (but keep injected blocks)
							pr_blockIndexFront = originalBlockIndexFront;
							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
							return false;
						}
						else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
							// Failed to allocate, undo changes (but keep injected blocks)
							pr_blockIndexFront = originalBlockIndexFront;
							pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
							this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
							return false;
						}
						
						// pr_blockIndexFront is updated inside new_block_index, so we need to
						// update our fallback value too (since we keep the new index even if we
						// later fail)
						originalBlockIndexFront = originalBlockIndexSlotsUsed;
					}
					
					// Insert a new block in the circular linked list
					auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
					if (newBlock == nullptr) {
						pr_blockIndexFront = originalBlockIndexFront;
						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
						return false;
					}
					
#ifdef MCDBGQ_TRACKMEM
					newBlock->owner = this;
#endif
					newBlock->ConcurrentQueue::Block::template set_all_empty<explicit_context>();
					if (this->tailBlock == nullptr) {
						newBlock->next = newBlock;
					}
					else {
						newBlock->next = this->tailBlock->next;
						this->tailBlock->next = newBlock;
					}
					this->tailBlock = newBlock;
					firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock;
					
					++pr_blockIndexSlotsUsed;
					
					auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront];
					entry.base = currentTailIndex;
					entry.block = this->tailBlock;
					pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
				}
				
				// Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and
				// publish the new block index front
				auto block = firstAllocatedBlock;
				while (true) {
					block->ConcurrentQueue::Block::template reset_empty<explicit_context>();
					if (block == this->tailBlock) {
						break;
					}
					block = block->next;
				}
				
				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
				}
			}
			
			// Enqueue, one block at a time
			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
			currentTailIndex = startTailIndex;
			auto endBlock = this->tailBlock;
			this->tailBlock = startBlock;
			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
				this->tailBlock = firstAllocatedBlock;
			}
			while (true) {
				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
					stopIndex = newTailIndex;
				}
				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
					while (currentTailIndex != stopIndex) {
						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
					}
				}
				else {
					MOODYCAMEL_TRY {
						while (currentTailIndex != stopIndex) {
							// Must use copy constructor even if move constructor is available
							// because we may have to revert if there's an exception.
							// Sorry about the horrible templated next line, but it was the only way
							// to disable moving *at compile time*, which is important because a type
							// may only define a (noexcept) move constructor, and so calls to the
							// cctor will not compile, even if they are in an if branch that will never
							// be executed
							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
							++currentTailIndex;
							++itemFirst;
						}
					}
					MOODYCAMEL_CATCH (...) {
						// Oh dear, an exception's been thrown -- destroy the elements that
						// were enqueued so far and revert the entire bulk operation (we'll keep
						// any allocated blocks in our linked list for later, though).
						auto constructedStopIndex = currentTailIndex;
						auto lastBlockEnqueued = this->tailBlock;
						
						pr_blockIndexFront = originalBlockIndexFront;
						pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
						this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock;
						
						if (!details::is_trivially_destructible<T>::value) {
							auto block = startBlock;
							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
								block = firstAllocatedBlock;
							}
							currentTailIndex = startTailIndex;
							while (true) {
								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
									stopIndex = constructedStopIndex;
								}
								while (currentTailIndex != stopIndex) {
									(*block)[currentTailIndex++]->~T();
								}
								if (block == lastBlockEnqueued) {
									break;
								}
								block = block->next;
							}
						}
						MOODYCAMEL_RETHROW;
					}
				}
				
				if (this->tailBlock == endBlock) {
					assert(currentTailIndex == newTailIndex);
					break;
				}
				this->tailBlock = this->tailBlock->next;
			}
			
			MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
				if (firstAllocatedBlock != nullptr)
					blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), std::memory_order_release);
			}
			
			this->tailIndex.store(newTailIndex, std::memory_order_release);
			return true;
		}
		
		template<typename It>
		size_t dequeue_bulk(It& itemFirst, size_t max)
		{
			auto tail = this->tailIndex.load(std::memory_order_relaxed);
			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
			if (details::circular_less_than<size_t>(0, desiredCount)) {
				desiredCount = desiredCount < max ? desiredCount : max;
				std::atomic_thread_fence(std::memory_order_acquire);
				
				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
				
				tail = this->tailIndex.load(std::memory_order_acquire);
				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
				if (details::circular_less_than<size_t>(0, actualCount)) {
					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
					if (actualCount < desiredCount) {
						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
					}
					
					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
					// will never exceed tail.
					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
					
					// Determine which block the first element is in
					auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
					auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire);
					
					auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
					auto firstBlockBaseIndex = firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
					auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(firstBlockBaseIndex - headBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
					auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1);
					
					// Iterate the blocks and dequeue
					auto index = firstIndex;
					do {
						auto firstIndexInBlock = index;
						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
						auto block = localBlockIndex->entries[indexIndex].block;
						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
							while (index != endIndex) {
								auto& el = *((*block)[index]);
								*itemFirst++ = std::move(el);
								el.~T();
								++index;
							}
						}
						else {
							MOODYCAMEL_TRY {
								while (index != endIndex) {
									auto& el = *((*block)[index]);
									*itemFirst = std::move(el);
									++itemFirst;
									el.~T();
									++index;
								}
							}
							MOODYCAMEL_CATCH (...) {
								// It's too late to revert the dequeue, but we can make sure that all
								// the dequeued objects are properly destroyed and the block index
								// (and empty count) are properly updated before we propagate the exception
								do {
									block = localBlockIndex->entries[indexIndex].block;
									while (index != endIndex) {
										(*block)[index++]->~T();
									}
									block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
									indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
									
									firstIndexInBlock = index;
									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
								} while (index != firstIndex + actualCount);
								
								MOODYCAMEL_RETHROW;
							}
						}
						block->ConcurrentQueue::Block::template set_many_empty<explicit_context>(firstIndexInBlock, static_cast<size_t>(endIndex - firstIndexInBlock));
						indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
					} while (index != firstIndex + actualCount);
					
					return actualCount;
				}
				else {
					// Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent
					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
				}
			}
			
			return 0;
		}
		
	private:
		struct BlockIndexEntry
		{
			index_t base;
			Block* block;
		};
		
		struct BlockIndexHeader
		{
			size_t size;
			std::atomic<size_t> front;		// Current slot (not next, like pr_blockIndexFront)
			BlockIndexEntry* entries;
			void* prev;
		};
		
		
		bool new_block_index(size_t numberOfFilledSlotsToExpose)
		{
			auto prevBlockSizeMask = pr_blockIndexSize - 1;
			
			// Create the new block
			pr_blockIndexSize <<= 1;
			auto newRawPtr = static_cast<char*>((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
			if (newRawPtr == nullptr) {
				pr_blockIndexSize >>= 1;		// Reset to allow graceful retry
				return false;
			}
			
			auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(newRawPtr + sizeof(BlockIndexHeader)));
			
			// Copy in all the old indices, if any
			size_t j = 0;
			if (pr_blockIndexSlotsUsed != 0) {
				auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
				do {
					newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
					i = (i + 1) & prevBlockSizeMask;
				} while (i != pr_blockIndexFront);
			}
			
			// Update everything
			auto header = new (newRawPtr) BlockIndexHeader;
			header->size = pr_blockIndexSize;
			header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed);
			header->entries = newBlockIndexEntries;
			header->prev = pr_blockIndexRaw;		// we link the new block to the old one so we can free it later
			
			pr_blockIndexFront = j;
			pr_blockIndexEntries = newBlockIndexEntries;
			pr_blockIndexRaw = newRawPtr;
			blockIndex.store(header, std::memory_order_release);
			
			return true;
		}
		
	private:
		std::atomic<BlockIndexHeader*> blockIndex;
		
		// To be used by producer only -- consumer must use the ones in referenced by blockIndex
		size_t pr_blockIndexSlotsUsed;
		size_t pr_blockIndexSize;
		size_t pr_blockIndexFront;		// Next slot (not current)
		BlockIndexEntry* pr_blockIndexEntries;
		void* pr_blockIndexRaw;
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
	public:
		ExplicitProducer* nextExplicitProducer;
	private:
#endif
		
#ifdef MCDBGQ_TRACKMEM
		friend struct MemStats;
#endif
	};
	
	
	//////////////////////////////////
	// Implicit queue
	//////////////////////////////////
	
	struct ImplicitProducer : public ProducerBase
	{			
		ImplicitProducer(ConcurrentQueue* parent_) :
			ProducerBase(parent_, false),
			nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
			blockIndex(nullptr)
		{
			new_block_index();
		}
		
		~ImplicitProducer() override
		{
			// Note that since we're in the destructor we can assume that all enqueue/dequeue operations
			// completed already; this means that all undequeued elements are placed contiguously across
			// contiguous blocks, and that only the first and last remaining blocks can be only partially
			// empty (all other remaining blocks must be completely full).
			
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
			// Unregister ourselves for thread termination notification
			if (!this->inactive.load(std::memory_order_relaxed)) {
				details::ThreadExitNotifier::unsubscribe(&threadExitListener);
			}
#endif
			
			// Destroy all remaining elements!
			auto tail = this->tailIndex.load(std::memory_order_relaxed);
			auto index = this->headIndex.load(std::memory_order_relaxed);
			Block* block = nullptr;
			assert(index == tail || details::circular_less_than(index, tail));
			bool forceFreeLastBlock = index != tail;		// If we enter the loop, then the last (tail) block will not be freed
			while (index != tail) {
				if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 || block == nullptr) {
					if (block != nullptr) {
						// Free the old block
						this->parent->add_block_to_free_list(block);
					}
					
					block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed);
				}
				
				((*block)[index])->~T();
				++index;
			}
			// Even if the queue is empty, there's still one block that's not on the free list
			// (unless the head index reached the end of it, in which case the tail will be poised
			// to create a new block).
			if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
				this->parent->add_block_to_free_list(this->tailBlock);
			}
			
			// Destroy block index
			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
			if (localBlockIndex != nullptr) {
				for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
					localBlockIndex->index[i]->~BlockIndexEntry();
				}
				do {
					auto prev = localBlockIndex->prev;
					localBlockIndex->~BlockIndexHeader();
					(Traits::free)(localBlockIndex);
					localBlockIndex = prev;
				} while (localBlockIndex != nullptr);
			}
		}
		
		template<AllocationMode allocMode, typename U>
		inline bool enqueue(U&& element)
		{
			index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed);
			index_t newTailIndex = 1 + currentTailIndex;
			if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
				// We reached the end of a block, start a new one
				auto head = this->headIndex.load(std::memory_order_relaxed);
				assert(!details::circular_less_than<index_t>(currentTailIndex, head));
				if (!details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
					return false;
				}
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
				debug::DebugLock lock(mutex);
#endif
				// Find out where we'll be inserting this block in the block index
				BlockIndexEntry* idxEntry;
				if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
					return false;
				}
				
				// Get ahold of a new block
				auto newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>();
				if (newBlock == nullptr) {
					rewind_block_index_tail();
					idxEntry->value.store(nullptr, std::memory_order_relaxed);
					return false;
				}
#ifdef MCDBGQ_TRACKMEM
				newBlock->owner = this;
#endif
				newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();

				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
					// May throw, try to insert now before we publish the fact that we have this new block
					MOODYCAMEL_TRY {
						new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
					}
					MOODYCAMEL_CATCH (...) {
						rewind_block_index_tail();
						idxEntry->value.store(nullptr, std::memory_order_relaxed);
						this->parent->add_block_to_free_list(newBlock);
						MOODYCAMEL_RETHROW;
					}
				}
				
				// Insert the new block into the index
				idxEntry->value.store(newBlock, std::memory_order_relaxed);
				
				this->tailBlock = newBlock;
				
				MOODYCAMEL_CONSTEXPR_IF (!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast<T*>(nullptr)) T(std::forward<U>(element)))) {
					this->tailIndex.store(newTailIndex, std::memory_order_release);
					return true;
				}
			}
			
			// Enqueue
			new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));
			
			this->tailIndex.store(newTailIndex, std::memory_order_release);
			return true;
		}
		
		template<typename U>
		bool dequeue(U& element)
		{
			// See ExplicitProducer::dequeue for rationale and explanation
			index_t tail = this->tailIndex.load(std::memory_order_relaxed);
			index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
			if (details::circular_less_than<index_t>(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) {
				std::atomic_thread_fence(std::memory_order_acquire);
				
				index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed);
				tail = this->tailIndex.load(std::memory_order_acquire);
				if ((details::likely)(details::circular_less_than<index_t>(myDequeueCount - overcommit, tail))) {
					index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);
					
					// Determine which block the element is in
					auto entry = get_block_index_entry_for_index(index);
					
					// Dequeue
					auto block = entry->value.load(std::memory_order_relaxed);
					auto& el = *((*block)[index]);
					
					if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, element = std::move(el))) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
						// Note: Acquiring the mutex with every dequeue instead of only when a block
						// is released is very sub-optimal, but it is, after all, purely debug code.
						debug::DebugLock lock(producer->mutex);
#endif
						struct Guard {
							Block* block;
							index_t index;
							BlockIndexEntry* entry;
							ConcurrentQueue* parent;
							
							~Guard()
							{
								(*block)[index]->~T();
								if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
									entry->value.store(nullptr, std::memory_order_relaxed);
									parent->add_block_to_free_list(block);
								}
							}
						} guard = { block, index, entry, this->parent };

						element = std::move(el); // NOLINT
					}
					else {
						element = std::move(el); // NOLINT
						el.~T(); // NOLINT

						if (block->ConcurrentQueue::Block::template set_empty<implicit_context>(index)) {
							{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
								debug::DebugLock lock(mutex);
#endif
								// Add the block back into the global free pool (and remove from block index)
								entry->value.store(nullptr, std::memory_order_relaxed);
							}
							this->parent->add_block_to_free_list(block);		// releases the above store
						}
					}
					
					return true;
				}
				else {
					this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
				}
			}
		
			return false;
		}
		
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4706)  // assignment within conditional expression
#endif
		template<AllocationMode allocMode, typename It>
		bool enqueue_bulk(It itemFirst, size_t count)
		{
			// First, we need to make sure we have enough room to enqueue all of the elements;
			// this means pre-allocating blocks and putting them in the block index (but only if
			// all the allocations succeeded).
			
			// Note that the tailBlock we start off with may not be owned by us any more;
			// this happens if it was filled up exactly to the top (setting tailIndex to
			// the first index of the next block which is not yet allocated), then dequeued
			// completely (putting it on the free list) before we enqueue again.
			
			index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
			auto startBlock = this->tailBlock;
			Block* firstAllocatedBlock = nullptr;
			auto endBlock = this->tailBlock;
			
			// Figure out how many blocks we'll need to allocate, and do so
			size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1)) - ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
			index_t currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
			if (blockBaseDiff > 0) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
				debug::DebugLock lock(mutex);
#endif
				do {
					blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
					currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
					
					// Find out where we'll be inserting this block in the block index
					BlockIndexEntry* idxEntry = nullptr;  // initialization here unnecessary but compiler can't always tell
					Block* newBlock;
					bool indexInserted = false;
					auto head = this->headIndex.load(std::memory_order_relaxed);
					assert(!details::circular_less_than<index_t>(currentTailIndex, head));
					bool full = !details::circular_less_than<index_t>(head, currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));

					if (full || !(indexInserted = insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) || (newBlock = this->parent->ConcurrentQueue::template requisition_block<allocMode>()) == nullptr) {
						// Index allocation or block allocation failed; revert any other allocations
						// and index insertions done so far for this operation
						if (indexInserted) {
							rewind_block_index_tail();
							idxEntry->value.store(nullptr, std::memory_order_relaxed);
						}
						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
							idxEntry = get_block_index_entry_for_index(currentTailIndex);
							idxEntry->value.store(nullptr, std::memory_order_relaxed);
							rewind_block_index_tail();
						}
						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
						this->tailBlock = startBlock;
						
						return false;
					}
					
#ifdef MCDBGQ_TRACKMEM
					newBlock->owner = this;
#endif
					newBlock->ConcurrentQueue::Block::template reset_empty<implicit_context>();
					newBlock->next = nullptr;
					
					// Insert the new block into the index
					idxEntry->value.store(newBlock, std::memory_order_relaxed);
					
					// Store the chain of blocks so that we can undo if later allocations fail,
					// and so that we can find the blocks when we do the actual enqueueing
					if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) {
						assert(this->tailBlock != nullptr);
						this->tailBlock->next = newBlock;
					}
					this->tailBlock = newBlock;
					endBlock = newBlock;
					firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
				} while (blockBaseDiff > 0);
			}
			
			// Enqueue, one block at a time
			index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
			currentTailIndex = startTailIndex;
			this->tailBlock = startBlock;
			assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0);
			if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) {
				this->tailBlock = firstAllocatedBlock;
			}
			while (true) {
				index_t stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
				if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
					stopIndex = newTailIndex;
				}
				MOODYCAMEL_CONSTEXPR_IF (MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))) {
					while (currentTailIndex != stopIndex) {
						new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
					}
				}
				else {
					MOODYCAMEL_TRY {
						while (currentTailIndex != stopIndex) {
							new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), new (static_cast<T*>(nullptr)) T(details::deref_noexcept(itemFirst)))>::eval(*itemFirst));
							++currentTailIndex;
							++itemFirst;
						}
					}
					MOODYCAMEL_CATCH (...) {
						auto constructedStopIndex = currentTailIndex;
						auto lastBlockEnqueued = this->tailBlock;
						
						if (!details::is_trivially_destructible<T>::value) {
							auto block = startBlock;
							if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
								block = firstAllocatedBlock;
							}
							currentTailIndex = startTailIndex;
							while (true) {
								stopIndex = (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
								if (details::circular_less_than<index_t>(constructedStopIndex, stopIndex)) {
									stopIndex = constructedStopIndex;
								}
								while (currentTailIndex != stopIndex) {
									(*block)[currentTailIndex++]->~T();
								}
								if (block == lastBlockEnqueued) {
									break;
								}
								block = block->next;
							}
						}
						
						currentTailIndex = (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
						for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) {
							currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
							auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
							idxEntry->value.store(nullptr, std::memory_order_relaxed);
							rewind_block_index_tail();
						}
						this->parent->add_blocks_to_free_list(firstAllocatedBlock);
						this->tailBlock = startBlock;
						MOODYCAMEL_RETHROW;
					}
				}
				
				if (this->tailBlock == endBlock) {
					assert(currentTailIndex == newTailIndex);
					break;
				}
				this->tailBlock = this->tailBlock->next;
			}
			this->tailIndex.store(newTailIndex, std::memory_order_release);
			return true;
		}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
		
		template<typename It>
		size_t dequeue_bulk(It& itemFirst, size_t max)
		{
			auto tail = this->tailIndex.load(std::memory_order_relaxed);
			auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
			auto desiredCount = static_cast<size_t>(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit));
			if (details::circular_less_than<size_t>(0, desiredCount)) {
				desiredCount = desiredCount < max ? desiredCount : max;
				std::atomic_thread_fence(std::memory_order_acquire);
				
				auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed);
				
				tail = this->tailIndex.load(std::memory_order_acquire);
				auto actualCount = static_cast<size_t>(tail - (myDequeueCount - overcommit));
				if (details::circular_less_than<size_t>(0, actualCount)) {
					actualCount = desiredCount < actualCount ? desiredCount : actualCount;
					if (actualCount < desiredCount) {
						this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release);
					}
					
					// Get the first index. Note that since there's guaranteed to be at least actualCount elements, this
					// will never exceed tail.
					auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);
					
					// Iterate the blocks and dequeue
					auto index = firstIndex;
					BlockIndexHeader* localBlockIndex;
					auto indexIndex = get_block_index_index_for_index(index, localBlockIndex);
					do {
						auto blockStartIndex = index;
						index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
						endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
						
						auto entry = localBlockIndex->index[indexIndex];
						auto block = entry->value.load(std::memory_order_relaxed);
						if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T&&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) {
							while (index != endIndex) {
								auto& el = *((*block)[index]);
								*itemFirst++ = std::move(el);
								el.~T();
								++index;
							}
						}
						else {
							MOODYCAMEL_TRY {
								while (index != endIndex) {
									auto& el = *((*block)[index]);
									*itemFirst = std::move(el);
									++itemFirst;
									el.~T();
									++index;
								}
							}
							MOODYCAMEL_CATCH (...) {
								do {
									entry = localBlockIndex->index[indexIndex];
									block = entry->value.load(std::memory_order_relaxed);
									while (index != endIndex) {
										(*block)[index++]->~T();
									}
									
									if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
										debug::DebugLock lock(mutex);
#endif
										entry->value.store(nullptr, std::memory_order_relaxed);
										this->parent->add_block_to_free_list(block);
									}
									indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
									
									blockStartIndex = index;
									endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) + static_cast<index_t>(BLOCK_SIZE);
									endIndex = details::circular_less_than<index_t>(firstIndex + static_cast<index_t>(actualCount), endIndex) ? firstIndex + static_cast<index_t>(actualCount) : endIndex;
								} while (index != firstIndex + actualCount);
								
								MOODYCAMEL_RETHROW;
							}
						}
						if (block->ConcurrentQueue::Block::template set_many_empty<implicit_context>(blockStartIndex, static_cast<size_t>(endIndex - blockStartIndex))) {
							{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
								debug::DebugLock lock(mutex);
#endif
								// Note that the set_many_empty above did a release, meaning that anybody who acquires the block
								// we're about to free can use it safely since our writes (and reads!) will have happened-before then.
								entry->value.store(nullptr, std::memory_order_relaxed);
							}
							this->parent->add_block_to_free_list(block);		// releases the above store
						}
						indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
					} while (index != firstIndex + actualCount);
					
					return actualCount;
				}
				else {
					this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release);
				}
			}
			
			return 0;
		}
		
	private:
		// The block size must be > 1, so any number with the low bit set is an invalid block base index
		static const index_t INVALID_BLOCK_BASE = 1;
		
		struct BlockIndexEntry
		{
			std::atomic<index_t> key;
			std::atomic<Block*> value;
		};
		
		struct BlockIndexHeader
		{
			size_t capacity;
			std::atomic<size_t> tail;
			BlockIndexEntry* entries;
			BlockIndexEntry** index;
			BlockIndexHeader* prev;
		};
		
		template<AllocationMode allocMode>
		inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex)
		{
			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);		// We're the only writer thread, relaxed is OK
			if (localBlockIndex == nullptr) {
				return false;  // this can happen if new_block_index failed in the constructor
			}
			size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
			idxEntry = localBlockIndex->index[newTail];
			if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
				idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
				
				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
				localBlockIndex->tail.store(newTail, std::memory_order_release);
				return true;
			}
			
			// No room in the old block index, try to allocate another one!
			MOODYCAMEL_CONSTEXPR_IF (allocMode == CannotAlloc) {
				return false;
			}
			else if (!new_block_index()) {
				return false;
			}
			else {
				localBlockIndex = blockIndex.load(std::memory_order_relaxed);
				newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1);
				idxEntry = localBlockIndex->index[newTail];
				assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE);
				idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
				localBlockIndex->tail.store(newTail, std::memory_order_release);
				return true;
			}
		}
		
		inline void rewind_block_index_tail()
		{
			auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
			localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), std::memory_order_relaxed);
		}
		
		inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const
		{
			BlockIndexHeader* localBlockIndex;
			auto idx = get_block_index_index_for_index(index, localBlockIndex);
			return localBlockIndex->index[idx];
		}
		
		inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const
		{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
			debug::DebugLock lock(mutex);
#endif
			index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
			localBlockIndex = blockIndex.load(std::memory_order_acquire);
			auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
			auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
			assert(tailBase != INVALID_BLOCK_BASE);
			// Note: Must use division instead of shift because the index may wrap around, causing a negative
			// offset, whose negativity we want to preserve
			auto offset = static_cast<size_t>(static_cast<typename std::make_signed<index_t>::type>(index - tailBase) / static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
			size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
			assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr);
			return idx;
		}
		
		bool new_block_index()
		{
			auto prev = blockIndex.load(std::memory_order_relaxed);
			size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
			auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
			auto raw = static_cast<char*>((Traits::malloc)(
				sizeof(BlockIndexHeader) +
				std::alignment_of<BlockIndexEntry>::value - 1 + sizeof(BlockIndexEntry) * entryCount +
				std::alignment_of<BlockIndexEntry*>::value - 1 + sizeof(BlockIndexEntry*) * nextBlockIndexCapacity));
			if (raw == nullptr) {
				return false;
			}
			
			auto header = new (raw) BlockIndexHeader;
			auto entries = reinterpret_cast<BlockIndexEntry*>(details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
			auto index = reinterpret_cast<BlockIndexEntry**>(details::align_for<BlockIndexEntry*>(reinterpret_cast<char*>(entries) + sizeof(BlockIndexEntry) * entryCount));
			if (prev != nullptr) {
				auto prevTail = prev->tail.load(std::memory_order_relaxed);
				auto prevPos = prevTail;
				size_t i = 0;
				do {
					prevPos = (prevPos + 1) & (prev->capacity - 1);
					index[i++] = prev->index[prevPos];
				} while (prevPos != prevTail);
				assert(i == prevCapacity);
			}
			for (size_t i = 0; i != entryCount; ++i) {
				new (entries + i) BlockIndexEntry;
				entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
				index[prevCapacity + i] = entries + i;
			}
			header->prev = prev;
			header->entries = entries;
			header->index = index;
			header->capacity = nextBlockIndexCapacity;
			header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed);
			
			blockIndex.store(header, std::memory_order_release);
			
			nextBlockIndexCapacity <<= 1;
			
			return true;
		}
		
	private:
		size_t nextBlockIndexCapacity;
		std::atomic<BlockIndexHeader*> blockIndex;

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
	public:
		details::ThreadExitListener threadExitListener;
	private:
#endif
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
	public:
		ImplicitProducer* nextImplicitProducer;
	private:
#endif

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
		mutable debug::DebugMutex mutex;
#endif
#ifdef MCDBGQ_TRACKMEM
		friend struct MemStats;
#endif
	};
	
	
	//////////////////////////////////
	// Block pool manipulation
	//////////////////////////////////
	
	void populate_initial_block_list(size_t blockCount)
	{
		initialBlockPoolSize = blockCount;
		if (initialBlockPoolSize == 0) {
			initialBlockPool = nullptr;
			return;
		}
		
		initialBlockPool = create_array<Block>(blockCount);
		if (initialBlockPool == nullptr) {
			initialBlockPoolSize = 0;
		}
		for (size_t i = 0; i < initialBlockPoolSize; ++i) {
			initialBlockPool[i].dynamicallyAllocated = false;
		}
	}
	
	inline Block* try_get_block_from_initial_pool()
	{
		if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) {
			return nullptr;
		}
		
		auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);
		
		return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
	}
	
	inline void add_block_to_free_list(Block* block)
	{
#ifdef MCDBGQ_TRACKMEM
		block->owner = nullptr;
#endif
		if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {
			destroy(block);
		}
		else {
			freeList.add(block);
		}
	}
	
	inline void add_blocks_to_free_list(Block* block)
	{
		while (block != nullptr) {
			auto next = block->next;
			add_block_to_free_list(block);
			block = next;
		}
	}
	
	inline Block* try_get_block_from_free_list()
	{
		return freeList.try_get();
	}
	
	// Gets a free block from one of the memory pools, or allocates a new one (if applicable)
	template<AllocationMode canAlloc>
	Block* requisition_block()
	{
		auto block = try_get_block_from_initial_pool();
		if (block != nullptr) {
			return block;
		}
		
		block = try_get_block_from_free_list();
		if (block != nullptr) {
			return block;
		}
		
		MOODYCAMEL_CONSTEXPR_IF (canAlloc == CanAlloc) {
			return create<Block>();
		}
		else {
			return nullptr;
		}
	}
	

#ifdef MCDBGQ_TRACKMEM
	public:
		struct MemStats {
			size_t allocatedBlocks;
			size_t usedBlocks;
			size_t freeBlocks;
			size_t ownedBlocksExplicit;
			size_t ownedBlocksImplicit;
			size_t implicitProducers;
			size_t explicitProducers;
			size_t elementsEnqueued;
			size_t blockClassBytes;
			size_t queueClassBytes;
			size_t implicitBlockIndexBytes;
			size_t explicitBlockIndexBytes;
			
			friend class ConcurrentQueue;
			
		private:
			static MemStats getFor(ConcurrentQueue* q)
			{
				MemStats stats = { 0 };
				
				stats.elementsEnqueued = q->size_approx();
			
				auto block = q->freeList.head_unsafe();
				while (block != nullptr) {
					++stats.allocatedBlocks;
					++stats.freeBlocks;
					block = block->freeListNext.load(std::memory_order_relaxed);
				}
				
				for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
					bool implicit = dynamic_cast<ImplicitProducer*>(ptr) != nullptr;
					stats.implicitProducers += implicit ? 1 : 0;
					stats.explicitProducers += implicit ? 0 : 1;
					
					if (implicit) {
						auto prod = static_cast<ImplicitProducer*>(ptr);
						stats.queueClassBytes += sizeof(ImplicitProducer);
						auto head = prod->headIndex.load(std::memory_order_relaxed);
						auto tail = prod->tailIndex.load(std::memory_order_relaxed);
						auto hash = prod->blockIndex.load(std::memory_order_relaxed);
						if (hash != nullptr) {
							for (size_t i = 0; i != hash->capacity; ++i) {
								if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) {
									++stats.allocatedBlocks;
									++stats.ownedBlocksImplicit;
								}
							}
							stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry);
							for (; hash != nullptr; hash = hash->prev) {
								stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry*);
							}
						}
						for (; details::circular_less_than<index_t>(head, tail); head += BLOCK_SIZE) {
							//auto block = prod->get_block_index_entry_for_index(head);
							++stats.usedBlocks;
						}
					}
					else {
						auto prod = static_cast<ExplicitProducer*>(ptr);
						stats.queueClassBytes += sizeof(ExplicitProducer);
						auto tailBlock = prod->tailBlock;
						bool wasNonEmpty = false;
						if (tailBlock != nullptr) {
							auto block = tailBlock;
							do {
								++stats.allocatedBlocks;
								if (!block->ConcurrentQueue::Block::template is_empty<explicit_context>() || wasNonEmpty) {
									++stats.usedBlocks;
									wasNonEmpty = wasNonEmpty || block != tailBlock;
								}
								++stats.ownedBlocksExplicit;
								block = block->next;
							} while (block != tailBlock);
						}
						auto index = prod->blockIndex.load(std::memory_order_relaxed);
						while (index != nullptr) {
							stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size * sizeof(typename ExplicitProducer::BlockIndexEntry);
							index = static_cast<typename ExplicitProducer::BlockIndexHeader*>(index->prev);
						}
					}
				}
				
				auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize - q->initialBlockPoolIndex.load(std::memory_order_relaxed);
				stats.allocatedBlocks += freeOnInitialPool;
				stats.freeBlocks += freeOnInitialPool;
				
				stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
				stats.queueClassBytes += sizeof(ConcurrentQueue);
				
				return stats;
			}
		};
		
		// For debugging only. Not thread-safe.
		MemStats getMemStats()
		{
			return MemStats::getFor(this);
		}
	private:
		friend struct MemStats;
#endif
	
	
	//////////////////////////////////
	// Producer list manipulation
	//////////////////////////////////	
	
	ProducerBase* recycle_or_create_producer(bool isExplicit)
	{
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
		debug::DebugLock lock(implicitProdMutex);
#endif
		// Try to re-use one first
		for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) {
			if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) {
				bool expected = true;
				if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, std::memory_order_relaxed)) {
					// We caught one! It's been marked as activated, the caller can have it
					return ptr;
				}
			}
		}

		return add_producer(isExplicit ? static_cast<ProducerBase*>(create<ExplicitProducer>(this)) : create<ImplicitProducer>(this));
	}
	
	ProducerBase* add_producer(ProducerBase* producer)
	{
		// Handle failed memory allocation
		if (producer == nullptr) {
			return nullptr;
		}
		
		producerCount.fetch_add(1, std::memory_order_relaxed);
		
		// Add it to the lock-free list
		auto prevTail = producerListTail.load(std::memory_order_relaxed);
		do {
			producer->next = prevTail;
		} while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed));
		
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
		if (producer->isExplicit) {
			auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
			do {
				static_cast<ExplicitProducer*>(producer)->nextExplicitProducer = prevTailExplicit;
			} while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast<ExplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
		}
		else {
			auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
			do {
				static_cast<ImplicitProducer*>(producer)->nextImplicitProducer = prevTailImplicit;
			} while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast<ImplicitProducer*>(producer), std::memory_order_release, std::memory_order_relaxed));
		}
#endif
		
		return producer;
	}
	
	void reown_producers()
	{
		// After another instance is moved-into/swapped-with this one, all the
		// producers we stole still think their parents are the other queue.
		// So fix them up!
		for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) {
			ptr->parent = this;
		}
	}
	
	
	//////////////////////////////////
	// Implicit producer hash
	//////////////////////////////////
	
	struct ImplicitProducerKVP
	{
		std::atomic<details::thread_id_t> key;
		ImplicitProducer* value;		// No need for atomicity since it's only read by the thread that sets it in the first place
		
		ImplicitProducerKVP() : value(nullptr) { }
		
		ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
		{
			key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
			value = other.value;
		}
		
		inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT
		{
			swap(other);
			return *this;
		}
		
		inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT
		{
			if (this != &other) {
				details::swap_relaxed(key, other.key);
				std::swap(value, other.value);
			}
		}
	};
	
	template<typename XT, typename XTraits>
	friend void moodycamel::swap(typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&, typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT;
	
	struct ImplicitProducerHash
	{
		size_t capacity;
		ImplicitProducerKVP* entries;
		ImplicitProducerHash* prev;
	};
	
	inline void populate_initial_implicit_producer_hash()
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
			return;
		}
		else {
			implicitProducerHashCount.store(0, std::memory_order_relaxed);
			auto hash = &initialImplicitProducerHash;
			hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
			hash->entries = &initialImplicitProducerHashEntries[0];
			for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
				initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
			}
			hash->prev = nullptr;
			implicitProducerHash.store(hash, std::memory_order_relaxed);
		}
	}
	
	void swap_implicit_producer_hashes(ConcurrentQueue& other)
	{
		MOODYCAMEL_CONSTEXPR_IF (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
			return;
		}
		else {
			// Swap (assumes our implicit producer hash is initialized)
			initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries);
			initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0];
			other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0];
			
			details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount);
			
			details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
			if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) {
				implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed);
			}
			else {
				ImplicitProducerHash* hash;
				for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; hash = hash->prev) {
					continue;
				}
				hash->prev = &initialImplicitProducerHash;
			}
			if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) {
				other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed);
			}
			else {
				ImplicitProducerHash* hash;
				for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
					continue;
				}
				hash->prev = &other.initialImplicitProducerHash;
			}
		}
	}
	
	// Only fails (returns nullptr) if memory allocation fails
	ImplicitProducer* get_or_add_implicit_producer()
	{
		// Note that since the data is essentially thread-local (key is thread ID),
		// there's a reduced need for fences (memory ordering is already consistent
		// for any individual thread), except for the current table itself.
		
		// Start by looking for the thread ID in the current and all previous hash tables.
		// If it's not found, it must not be in there yet, since this same thread would
		// have added it previously to one of the tables that we traversed.
		
		// Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table
		
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
		debug::DebugLock lock(implicitProdMutex);
#endif
		
		auto id = details::thread_id();
		auto hashedId = details::hash_thread_id(id);
		
		auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
		assert(mainHash != nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
		for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
			// Look for the id in this hash
			auto index = hashedId;
			while (true) {		// Not an infinite loop because at least one slot is free in the hash table
				index &= hash->capacity - 1u;
				
				auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed);
				if (probedKey == id) {
					// Found it! If we had to search several hashes deep, though, we should lazily add it
					// to the current main hash table to avoid the extended search next time.
					// Note there's guaranteed to be room in the current hash table since every subsequent
					// table implicitly reserves space for all previous tables (there's only one
					// implicitProducerHashCount).
					auto value = hash->entries[index].value;
					if (hash != mainHash) {
						index = hashedId;
						while (true) {
							index &= mainHash->capacity - 1u;
							auto empty = details::invalid_thread_id;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
							auto reusable = details::invalid_thread_id2;
							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed) ||
								mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
#else
							if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
#endif
								mainHash->entries[index].value = value;
								break;
							}
							++index;
						}
					}
					
					return value;
				}
				if (probedKey == details::invalid_thread_id) {
					break;		// Not in this hash table
				}
				++index;
			}
		}
		
		// Insert!
		auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
		while (true) {
			// NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
			if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) {
				// We've acquired the resize lock, try to allocate a bigger hash table.
				// Note the acquire fence synchronizes with the release fence at the end of this block, and hence when
				// we reload implicitProducerHash it must be the most recent version (it only gets changed within this
				// locked block).
				mainHash = implicitProducerHash.load(std::memory_order_acquire);
				if (newCount >= (mainHash->capacity >> 1)) {
					size_t newCapacity = mainHash->capacity << 1;
					while (newCount >= (newCapacity >> 1)) {
						newCapacity <<= 1;
					}
					auto raw = static_cast<char*>((Traits::malloc)(sizeof(ImplicitProducerHash) + std::alignment_of<ImplicitProducerKVP>::value - 1 + sizeof(ImplicitProducerKVP) * newCapacity));
					if (raw == nullptr) {
						// Allocation failed
						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
						implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
						return nullptr;
					}
					
					auto newHash = new (raw) ImplicitProducerHash;
					newHash->capacity = static_cast<size_t>(newCapacity);
					newHash->entries = reinterpret_cast<ImplicitProducerKVP*>(details::align_for<ImplicitProducerKVP>(raw + sizeof(ImplicitProducerHash)));
					for (size_t i = 0; i != newCapacity; ++i) {
						new (newHash->entries + i) ImplicitProducerKVP;
						newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed);
					}
					newHash->prev = mainHash;
					implicitProducerHash.store(newHash, std::memory_order_release);
					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
					mainHash = newHash;
				}
				else {
					implicitProducerHashResizeInProgress.clear(std::memory_order_release);
				}
			}
			
			// If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table
			// to finish being allocated by another thread (and if we just finished allocating above, the condition will
			// always be true)
			if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
				auto producer = static_cast<ImplicitProducer*>(recycle_or_create_producer(false));
				if (producer == nullptr) {
					implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
					return nullptr;
				}
				
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
				producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback;
				producer->threadExitListener.userData = producer;
				details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
#endif
				
				auto index = hashedId;
				while (true) {
					index &= mainHash->capacity - 1u;
					auto empty = details::invalid_thread_id;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
					auto reusable = details::invalid_thread_id2;
					if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
						implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);  // already counted as a used slot
						mainHash->entries[index].value = producer;
						break;
					}
#endif
					if (mainHash->entries[index].key.compare_exchange_strong(empty,    id, std::memory_order_seq_cst, std::memory_order_relaxed)) {
						mainHash->entries[index].value = producer;
						break;
					}
					++index;
				}
				return producer;
			}
			
			// Hmm, the old hash is quite full and somebody else is busy allocating a new one.
			// We need to wait for the allocating thread to finish (if it succeeds, we add, if not,
			// we try to allocate ourselves).
			mainHash = implicitProducerHash.load(std::memory_order_acquire);
		}
	}
	
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
	void implicit_producer_thread_exited(ImplicitProducer* producer)
	{
		// Remove from hash
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
		debug::DebugLock lock(implicitProdMutex);
#endif
		auto hash = implicitProducerHash.load(std::memory_order_acquire);
		assert(hash != nullptr);		// The thread exit listener is only registered if we were added to a hash in the first place
		auto id = details::thread_id();
		auto hashedId = details::hash_thread_id(id);
		details::thread_id_t probedKey;
		
		// We need to traverse all the hashes just in case other threads aren't on the current one yet and are
		// trying to add an entry thinking there's a free slot (because they reused a producer)
		for (; hash != nullptr; hash = hash->prev) {
			auto index = hashedId;
			do {
				index &= hash->capacity - 1u;
				probedKey = id;
				if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, std::memory_order_relaxed)) {
					break;
				}
				++index;
			} while (probedKey != details::invalid_thread_id);		// Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place
		}
		
		// Mark the queue as being recyclable
		producer->inactive.store(true, std::memory_order_release);
	}
	
	static void implicit_producer_thread_exited_callback(void* userData)
	{
		auto producer = static_cast<ImplicitProducer*>(userData);
		auto queue = producer->parent;
		queue->implicit_producer_thread_exited(producer);
	}
#endif
	
	//////////////////////////////////
	// Utility functions
	//////////////////////////////////

	template<typename TAlign>
	static inline void* aligned_malloc(size_t size)
	{
		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
			return (Traits::malloc)(size);
		else {
			size_t alignment = std::alignment_of<TAlign>::value;
			void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*));
			if (!raw)
				return nullptr;
			char* ptr = details::align_for<TAlign>(reinterpret_cast<char*>(raw) + sizeof(void*));
			*(reinterpret_cast<void**>(ptr) - 1) = raw;
			return ptr;
		}
	}

	template<typename TAlign>
	static inline void aligned_free(void* ptr)
	{
		MOODYCAMEL_CONSTEXPR_IF (std::alignment_of<TAlign>::value <= std::alignment_of<details::max_align_t>::value)
			return (Traits::free)(ptr);
		else
			(Traits::free)(ptr ? *(reinterpret_cast<void**>(ptr) - 1) : nullptr);
	}

	template<typename U>
	static inline U* create_array(size_t count)
	{
		assert(count > 0);
		U* p = static_cast<U*>(aligned_malloc<U>(sizeof(U) * count));
		if (p == nullptr)
			return nullptr;

		for (size_t i = 0; i != count; ++i)
			new (p + i) U();
		return p;
	}

	template<typename U>
	static inline void destroy_array(U* p, size_t count)
	{
		if (p != nullptr) {
			assert(count > 0);
			for (size_t i = count; i != 0; )
				(p + --i)->~U();
		}
		aligned_free<U>(p);
	}

	template<typename U>
	static inline U* create()
	{
		void* p = aligned_malloc<U>(sizeof(U));
		return p != nullptr ? new (p) U : nullptr;
	}

	template<typename U, typename A1>
	static inline U* create(A1&& a1)
	{
		void* p = aligned_malloc<U>(sizeof(U));
		return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
	}

	template<typename U>
	static inline void destroy(U* p)
	{
		if (p != nullptr)
			p->~U();
		aligned_free<U>(p);
	}

private:
	std::atomic<ProducerBase*> producerListTail;
	std::atomic<std::uint32_t> producerCount;
	
	std::atomic<size_t> initialBlockPoolIndex;
	Block* initialBlockPool;
	size_t initialBlockPoolSize;
	
#ifndef MCDBGQ_USEDEBUGFREELIST
	FreeList<Block> freeList;
#else
	debug::DebugFreeList<Block> freeList;
#endif
	
	std::atomic<ImplicitProducerHash*> implicitProducerHash;
	std::atomic<size_t> implicitProducerHashCount;		// Number of slots logically used
	ImplicitProducerHash initialImplicitProducerHash;
	std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE> initialImplicitProducerHashEntries;
	std::atomic_flag implicitProducerHashResizeInProgress;
	
	std::atomic<std::uint32_t> nextExplicitConsumerId;
	std::atomic<std::uint32_t> globalExplicitConsumerOffset;
	
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
	debug::DebugMutex implicitProdMutex;
#endif
	
#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
	std::atomic<ExplicitProducer*> explicitProducers;
	std::atomic<ImplicitProducer*> implicitProducers;
#endif
};


template<typename T, typename Traits>
ProducerToken::ProducerToken(ConcurrentQueue<T, Traits>& queue)
	: producer(queue.recycle_or_create_producer(true))
{
	if (producer != nullptr) {
		producer->token = this;
	}
}

template<typename T, typename Traits>
ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits>& queue)
	: producer(reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->recycle_or_create_producer(true))
{
	if (producer != nullptr) {
		producer->token = this;
	}
}

template<typename T, typename Traits>
ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits>& queue)
	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
{
	initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
}

template<typename T, typename Traits>
ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits>& queue)
	: itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr)
{
	initialOffset = reinterpret_cast<ConcurrentQueue<T, Traits>*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
	lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
}

template<typename T, typename Traits>
inline void swap(ConcurrentQueue<T, Traits>& a, ConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
{
	a.swap(b);
}

inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT
{
	a.swap(b);
}

inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT
{
	a.swap(b);
}

template<typename T, typename Traits>
inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& a, typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT
{
	a.swap(b);
}

}

#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
#pragma warning(pop)
#endif

#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
#pragma GCC diagnostic pop
#endif


================================================
FILE: dispenso/third-party/moodycamel/lightweightsemaphore.h
================================================
// Provides an efficient implementation of a semaphore (LightweightSemaphore).
// This is an extension of Jeff Preshing's sempahore implementation (licensed 
// under the terms of its separate zlib license) that has been adapted and
// extended by Cameron Desrochers.

#pragma once

#include <cstddef> // For std::size_t
#include <atomic>
#include <type_traits> // For std::make_signed<T>

#if defined(_WIN32)
// Avoid including windows.h in a header; we only need a handful of
// items, so we'll redeclare them here (this is relatively safe since
// the API generally has to remain stable between Windows versions).
// I know this is an ugly hack but it still beats polluting the global
// namespace with thousands of generic names or adding a .cpp for nothing.
extern "C" {
	struct _SECURITY_ATTRIBUTES;
	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
}
#elif defined(__MACH__)
#include <mach/mach.h>
#elif defined(__unix__)
#include <semaphore.h>

#if defined(__GLIBC_PREREQ) && defined(_GNU_SOURCE)
#if __GLIBC_PREREQ(2,30)
#define MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
#endif
#endif
#endif

namespace moodycamel
{
namespace details
{

// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
// portable + lightweight semaphore implementations, originally from
// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
// LICENSE:
// Copyright (c) 2015 Jeff Preshing
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
//	claim that you wrote the original software. If you use this software
//	in a product, an acknowledgement in the product documentation would be
//	appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
//	misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#if defined(_WIN32)
class Semaphore
{
private:
	void* m_hSema;
	
	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;

public:
	Semaphore(int initialCount = 0)
	{
		assert(initialCount >= 0);
		const long maxLong = 0x7fffffff;
		m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
		assert(m_hSema);
	}

	~Semaphore()
	{
		CloseHandle(m_hSema);
	}

	bool wait()
	{
		const unsigned long infinite = 0xffffffff;
		return WaitForSingleObject(m_hSema, infinite) == 0;
	}
	
	bool try_wait()
	{
		return WaitForSingleObject(m_hSema, 0) == 0;
	}
	
	bool timed_wait(std::uint64_t usecs)
	{
		return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
	}

	void signal(int count = 1)
	{
		while (!ReleaseSemaphore(m_hSema, count, nullptr));
	}
};
#elif defined(__MACH__)
//---------------------------------------------------------
// Semaphore (Apple iOS and OSX)
// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
//---------------------------------------------------------
class Semaphore
{
private:
	semaphore_t m_sema;

	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;

public:
	Semaphore(int initialCount = 0)
	{
		assert(initialCount >= 0);
		kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
		assert(rc == KERN_SUCCESS);
		(void)rc;
	}

	~Semaphore()
	{
		semaphore_destroy(mach_task_self(), m_sema);
	}

	bool wait()
	{
		return semaphore_wait(m_sema) == KERN_SUCCESS;
	}
	
	bool try_wait()
	{
		return timed_wait(0);
	}
	
	bool timed_wait(std::uint64_t timeout_usecs)
	{
		mach_timespec_t ts;
		ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
		ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);

		// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
		kern_return_t rc = semaphore_timedwait(m_sema, ts);
		return rc == KERN_SUCCESS;
	}

	void signal()
	{
		while (semaphore_signal(m_sema) != KERN_SUCCESS);
	}

	void signal(int count)
	{
		while (count-- > 0)
		{
			while (semaphore_signal(m_sema) != KERN_SUCCESS);
		}
	}
};
#elif defined(__unix__)
//---------------------------------------------------------
// Semaphore (POSIX, Linux)
//---------------------------------------------------------
class Semaphore
{
private:
	sem_t m_sema;

	Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
	Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;

public:
	Semaphore(int initialCount = 0)
	{
		assert(initialCount >= 0);
		int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
		assert(rc == 0);
		(void)rc;
	}

	~Semaphore()
	{
		sem_destroy(&m_sema);
	}

	bool wait()
	{
		// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
		int rc;
		do {
			rc = sem_wait(&m_sema);
		} while (rc == -1 && errno == EINTR);
		return rc == 0;
	}

	bool try_wait()
	{
		int rc;
		do {
			rc = sem_trywait(&m_sema);
		} while (rc == -1 && errno == EINTR);
		return rc == 0;
	}

	bool timed_wait(std::uint64_t usecs)
	{
		struct timespec ts;
		const int usecs_in_1_sec = 1000000;
		const int nsecs_in_1_sec = 1000000000;
#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
		clock_gettime(CLOCK_MONOTONIC, &ts);
#else
		clock_gettime(CLOCK_REALTIME, &ts);
#endif
		ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
		ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
		// sem_timedwait bombs if you have more than 1e9 in tv_nsec
		// so we have to clean things up before passing it in
		if (ts.tv_nsec >= nsecs_in_1_sec) {
			ts.tv_nsec -= nsecs_in_1_sec;
			++ts.tv_sec;
		}

		int rc;
		do {
#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC
			rc = sem_clockwait(&m_sema, CLOCK_MONOTONIC, &ts);
#else
			rc = sem_timedwait(&m_sema, &ts);
#endif
		} while (rc == -1 && errno == EINTR);
		return rc == 0;
	}

	void signal()
	{
		while (sem_post(&m_sema) == -1);
	}

	void signal(int count)
	{
		while (count-- > 0)
		{
			while (sem_post(&m_sema) == -1);
		}
	}
};
#else
#error Unsupported platform! (No semaphore wrapper available)
#endif

}	// end namespace details


//---------------------------------------------------------
// LightweightSemaphore
//---------------------------------------------------------
class LightweightSemaphore
{
public:
	typedef std::make_signed<std::size_t>::type ssize_t;

private:
	std::atomic<ssize_t> m_count;
	details::Semaphore m_sema;
	int m_maxSpins;

	bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
	{
		ssize_t oldCount;
		int spin = m_maxSpins;
		while (--spin >= 0)
		{
			oldCount = m_count.load(std::memory_order_relaxed);
			if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
				return true;
			std::atomic_signal_fence(std::memory_order_acquire);	 // Prevent the compiler from collapsing the loop.
		}
		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
		if (oldCount > 0)
			return true;
		if (timeout_usecs < 0)
		{
			if (m_sema.wait())
				return true;
		}
		if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
			return true;
		// At this point, we've timed out waiting for the semaphore, but the
		// count is still decremented indicating we may still be waiting on
		// it. So we have to re-adjust the count, but only if the semaphore
		// wasn't signaled enough times for us too since then. If it was, we
		// need to release the semaphore too.
		while (true)
		{
			oldCount = m_count.load(std::memory_order_acquire);
			if (oldCount >= 0 && m_sema.try_wait())
				return true;
			if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
				return false;
		}
	}

	ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
	{
		assert(max > 0);
		ssize_t oldCount;
		int spin = m_maxSpins;
		while (--spin >= 0)
		{
			oldCount = m_count.load(std::memory_order_relaxed);
			if (oldCount > 0)
			{
				ssize_t newCount = oldCount > max ? oldCount - max : 0;
				if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
					return oldCount - newCount;
			}
			std::atomic_signal_fence(std::memory_order_acquire);
		}
		oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
		if (oldCount <= 0)
		{
			if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
			{
				while (true)
				{
					oldCount = m_count.load(std::memory_order_acquire);
					if (oldCount >= 0 && m_sema.try_wait())
						break;
					if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
						return 0;
				}
			}
		}
		if (max > 1)
			return 1 + tryWaitMany(max - 1);
		return 1;
	}

public:
	LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins)
	{
		assert(initialCount >= 0);
		assert(maxSpins >= 0);
	}

	bool tryWait()
	{
		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
		while (oldCount > 0)
		{
			if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
				return true;
		}
		return false;
	}

	bool wait()
	{
		return tryWait() || waitWithPartialSpinning();
	}

	bool wait(std::int64_t timeout_usecs)
	{
		return tryWait() || waitWithPartialSpinning(timeout_usecs);
	}

	// Acquires between 0 and (greedily) max, inclusive
	ssize_t tryWaitMany(ssize_t max)
	{
		assert(max >= 0);
		ssize_t oldCount = m_count.load(std::memory_order_relaxed);
		while (oldCount > 0)
		{
			ssize_t newCount = oldCount > max ? oldCount - max : 0;
			if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
				return oldCount - newCount;
		}
		return 0;
	}

	// Acquires at least one, and (greedily) at most max
	ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
	{
		assert(max >= 0);
		ssize_t result = tryWaitMany(max);
		if (result == 0 && max > 0)
			result = waitManyWithPartialSpinning(max, timeout_usecs);
		return result;
	}
	
	ssize_t waitMany(ssize_t max)
	{
		ssize_t result = waitMany(max, -1);
		assert(result > 0);
		return result;
	}

	void signal(ssize_t count = 1)
	{
		assert(count >= 0);
		ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
		ssize_t toRelease = -oldCount < count ? -oldCount : count;
		if (toRelease > 0)
		{
			m_sema.signal((int)toRelease);
		}
	}
	
	std::size_t availableApprox() const
	{
		ssize_t count = m_count.load(std::memory_order_relaxed);
		return count > 0 ? static_cast<std::size_t>(count) : 0;
	}
};

}   // end namespace moodycamel


================================================
FILE: dispenso/thread_id.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/thread_id.h>

namespace dispenso {

std::atomic<uint64_t> nextThread{0};
constexpr uint64_t kInvalidThread = std::numeric_limits<uint64_t>::max();
DISPENSO_THREAD_LOCAL uint64_t currentThread = kInvalidThread;

uint64_t threadId() {
  if (currentThread == kInvalidThread) {
    currentThread = nextThread.fetch_add(uint64_t{1}, std::memory_order_relaxed);
  }
  return currentThread;
}

} // namespace dispenso


================================================
FILE: dispenso/thread_id.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file thread_id.h
 * @ingroup group_util
 * Utilities for getting a unique thread identifier.
 **/

#pragma once

#include <dispenso/platform.h>

namespace dispenso {

/**
 * Get the current thread's identifier, unique within the current process.
 *
 * @return An integer representing the current thread.
 *
 * @note Thread IDs are assumed to not be reused over the lifetime of a process, but this should
 * still enable processes running for thousands of years, even with very poor spawn/kill thread
 * patterns.
 *
 * @note If thread ID is needed for cross-process synchronization, one must fall back on
 * system-specific thread IDs.
 **/
DISPENSO_DLL_ACCESS uint64_t threadId();

} // namespace dispenso


================================================
FILE: dispenso/thread_pool.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/detail/quanta.h>
#include <dispenso/thread_pool.h>

#if defined DISPENSO_DEBUG
#include <iostream>
#endif // DISPENSO_DEBUG

namespace dispenso {

namespace {
size_t getAdjustedThreadCount(size_t requested) {
  static const size_t maxThreads = []() {
    size_t maxT = std::numeric_limits<size_t>::max();

#if defined(_WIN32) && !defined(__MINGW32__)
#pragma warning(push)
#pragma warning(disable : 4996)
#endif

    char* envThreads = std::getenv("DISPENSO_MAX_THREADS_PER_POOL");

#if defined(_WIN32) && !defined(__MINGW32__)
#pragma warning(pop)
#endif

    if (envThreads) {
      char* end = nullptr;
      maxT = std::strtoul(envThreads, &end, 10);
#if defined DISPENSO_DEBUG
      std::cout << "DISPENSO_MAX_THREADS_PER_POOL = " << maxT << std::endl;
#endif // DISPENSO_DEBUG
    }
    return maxT;
  }();

  return std::min(requested, maxThreads);
}
} // namespace

void ThreadPool::PerThreadData::setThread(std::thread&& t) {
  thread_ = std::move(t);
}

void ThreadPool::PerThreadData::stop() {
  running_.store(false, std::memory_order_release);
}

uint32_t ThreadPool::wait(uint32_t currentEpoch) {
  if (sleepLengthUs_ > 0) {
    return epochWaiter_.waitFor(currentEpoch, sleepLengthUs_.load(std::memory_order_acquire));
  } else {
    return epochWaiter_.current();
  }
}
void ThreadPool::wake() {
  epochWaiter_.bumpAndWake();
}

inline bool ThreadPool::PerThreadData::running() {
  return running_.load(std::memory_order_acquire);
}

ThreadPool::ThreadPool(size_t n, size_t poolLoadMultiplier)
    : poolLoadMultiplier_(poolLoadMultiplier),
      poolLoadFactor_(static_cast<ssize_t>(getAdjustedThreadCount(n) * poolLoadMultiplier)),
      numThreads_(static_cast<ssize_t>(getAdjustedThreadCount(n))) {
  detail::registerFineSchedulerQuanta();
#if defined DISPENSO_DEBUG
  assert(poolLoadMultiplier > 0);
#endif // DISPENSO_DEBUG
  for (size_t i = 0; i < static_cast<size_t>(numThreads_); ++i) {
    threads_.emplace_back();
    threads_.back().setThread(std::thread([this, &back = threads_.back()]() { threadLoop(back); }));
  }
}

ThreadPool::PerThreadData::~PerThreadData() {}

void ThreadPool::threadLoop(PerThreadData& data) {
  // On Windows, wake syscalls (WakeByAddressSingle) are expensive per-thread
  // kernel transitions, so spin longer before sleeping to avoid costly
  // sleep/wake cycles.
#if defined(_WIN32)
  static constexpr int kBackoffYield = 100;
  static constexpr int kBackoffSleep = kBackoffYield + 20;
#else
  static constexpr int kBackoffYield = 50;
  static constexpr int kBackoffSleep = kBackoffYield + 5;
#endif

  moodycamel::ConsumerToken ctoken(work_);
  moodycamel::ProducerToken ptoken(work_);

  OnceFunction next;

  int failCount = 0;
  detail::PerPoolPerThreadInfo::registerPool(this, &ptoken);
  uint32_t epoch = epochWaiter_.current();

  if (enableEpochWaiter_) {
    while (data.running()) {
      int localWorkDone = 0;
      while (true) {
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
        bool got = work_.try_dequeue(ctoken, next);
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
        if (!got) {
          break;
        }
        OnceFunction task(std::move(next));
        task();
        ++localWorkDone;
        if (localWorkDone >= kWorkBatchSize) {
          workRemaining_.fetch_sub(localWorkDone, std::memory_order_relaxed);
          localWorkDone = 0;
        }
        failCount = 0;
      }
      if (localWorkDone > 0) {
        workRemaining_.fetch_sub(localWorkDone, std::memory_order_relaxed);
      }

      ++failCount;

      detail::cpuRelax();
      if (failCount > kBackoffSleep) {
        numSleeping_.fetch_add(1, std::memory_order_acq_rel);
        epoch = wait(epoch);
        numSleeping_.fetch_sub(1, std::memory_order_acq_rel);
        failCount = 0;
      } else if (failCount > kBackoffYield) {
        std::this_thread::yield();
      }
    }
  } else {
    while (data.running()) {
      while (true) {
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
        bool got = work_.try_dequeue(ctoken, next);
        DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
        if (!got) {
          break;
        }
        executeNext(std::move(next));
        failCount = 0;
      }

      ++failCount;

      detail::cpuRelax();
      if (failCount > kBackoffSleep) {
        epoch = wait(epoch);
      } else if (failCount > kBackoffYield) {
        std::this_thread::yield();
      }
    }
  }
}

void ThreadPool::resizeLocked(ssize_t sn) {
  sn = getAdjustedThreadCount(sn);

  assert(sn >= 0);
  size_t n = static_cast<size_t>(sn);

  if (n < threads_.size()) {
    for (size_t i = n; i < threads_.size(); ++i) {
      threads_[i].stop();
    }

    while (threads_.size() > n) {
      wake();
      threads_.back().thread_.join();
      threads_.pop_back();
    }

  } else if (n > threads_.size()) {
    for (size_t i = threads_.size(); i < n; ++i) {
      threads_.emplace_back();
      threads_.back().setThread(
          std::thread([this, &back = threads_.back()]() { threadLoop(back); }));
    }
  }
  poolLoadFactor_.store(static_cast<ssize_t>(n * poolLoadMultiplier_), std::memory_order_relaxed);
  numThreads_.store(sn, std::memory_order_relaxed);

  if (!sn) {
    // Pool will run future tasks inline since we have no threads, but we still need to empty
    // current set of tasks
    while (tryExecuteNext()) {
    }
  }
}

ThreadPool::~ThreadPool() {
#if defined DISPENSO_DEBUG
  assert(outstandingTaskSets_.load(std::memory_order_acquire) == 0);
#endif // DISPENSO_DEBUG

  // Strictly speaking, it is unnecessary to lock this in the destructor; however, it could be a
  // useful diagnostic to learn that the mutex is already locked when we reach this point.
  std::unique_lock<std::mutex> lk(threadsMutex_, std::try_to_lock);
  assert(lk.owns_lock());

  // Mark all threads as stopped first, then wake them all at once. The previous
  // approach (stop + wake one at a time) was fragile: a wake() could reach an
  // already-awake thread while another thread remained sleeping, forcing it to
  // wait for the epoch timeout (e.g. 30ms) to notice the stop flag. Under TSAN
  // this caused severe slowdowns in ThreadPool destruction.
  for (auto& t : threads_) {
    t.stop();
  }
  ssize_t numThreads = static_cast<ssize_t>(threads_.size());
  if (numThreads > 0) {
    wakeN(numThreads);
  }

  while (tryExecuteNext()) {
  }

  for (auto& t : threads_) {
    t.thread_.join();
  }
  threads_.clear();

  while (tryExecuteNext()) {
  }
}
ThreadPool& globalThreadPool() {
  // It should be illegal to access globalThreadPool after exiting main.
  // We default to hardware threads minus one because the calling thread usually is involved in
  // computation.
  static ThreadPool pool(std::thread::hardware_concurrency() - 1);
  return pool;
}

void resizeGlobalThreadPool(size_t numThreads) {
  globalThreadPool().resize(static_cast<ssize_t>(numThreads));
}

void ThreadPool::wakeN(ssize_t n) {
#if defined(_WIN32)
  // On Windows, always use WakeAll. WakeByAddressSingle has no batch wake
  // count, so multiple calls means O(N) kernel transitions. A single WakeAll
  // is one transition and keeps threads in their spin phase (kBackoffYield
  // iterations) where they can absorb follow-up work without re-waking.
  (void)n;
  epochWaiter_.bumpAndWakeAll();
#else
  ssize_t sleeping = numSleeping_.load(std::memory_order_relaxed);
  constexpr unsigned kWakeAllMultiplier = 2;
  if (static_cast<unsigned>(n) * kWakeAllMultiplier >= static_cast<unsigned>(sleeping)) {
    epochWaiter_.bumpAndWakeAll();
  } else {
    epochWaiter_.bumpAndWakeN(static_cast<int>(n), static_cast<int>(sleeping));
  }
#endif
}

} // namespace dispenso


================================================
FILE: dispenso/thread_pool.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file thread_pool.h
 * @ingroup group_core
 * A file providing ThreadPool.  This is the heart of dispenso.  All other scheduling paradigms,
 * including TaskSets, Futures, pipelines, and parallel loops, are built on top of ThreadPool.
 **/

#pragma once

#include <atomic>
#include <cassert>
#include <condition_variable>
#include <cstdlib>
#include <deque>
#include <iterator>
#include <mutex>
#include <thread>

#include <moodycamel/concurrentqueue.h>

#include <dispenso/detail/epoch_waiter.h>
#include <dispenso/detail/per_thread_info.h>
#include <dispenso/once_function.h>
#include <dispenso/platform.h>
#include <dispenso/tsan_annotations.h>

namespace dispenso {

#if !defined(DISPENSO_WAKEUP_ENABLE)
#if defined(_WIN32) || defined(__linux__) || defined(__MACH__)
#define DISPENSO_WAKEUP_ENABLE 1
#else
#define DISPENSO_WAKEUP_ENABLE 0
#endif // platform
#endif // DISPENSO_WAKEUP_ENABLE

#if !defined(DISPENSO_POLL_PERIOD_US)
#if defined(_WIN32)
#define DISPENSO_POLL_PERIOD_US 1000
#else
#if !(DISPENSO_WAKEUP_ENABLE)
#define DISPENSO_POLL_PERIOD_US 200
#else
#define DISPENSO_POLL_PERIOD_US (1 << 15) // Determined empirically good on dual Xeon Linux
#endif // DISPENSO_WAKEUP_ENABLE
#endif // PLATFORM
#endif // DISPENSO_POLL_PERIOD_US

constexpr uint32_t kDefaultSleepLenUs = DISPENSO_POLL_PERIOD_US;

constexpr bool kDefaultWakeupEnable = DISPENSO_WAKEUP_ENABLE;

/**
 * A simple tag specifier that can be fed to TaskSets and
 * ThreadPools to denote that the current thread should never immediately execute a functor, but
 * rather, the functor should always be placed in the ThreadPool's queue.
 **/
struct ForceQueuingTag {};

/**
 * The basic executor for dispenso.  It provides typical thread pool functionality, plus allows work
 * stealing by related types (e.g. TaskSet, Future, etc...), which prevents deadlock when waiting
 * for pool-recursive tasks.
 */
class alignas(kCacheLineSize) ThreadPool {
 public:
  /**
   * Construct a thread pool.
   *
   * @param n The number of threads to spawn at construction.
   * @param poolLoadMultiplier A parameter that specifies how overloaded the pool should be before
   * allowing the current thread to self-steal work.
   **/
  DISPENSO_DLL_ACCESS ThreadPool(size_t n, size_t poolLoadMultiplier = 32);

  /**
   * Enable or disable signaling wake functionality.  If enabled, this will try to ensure that
   * threads are woken up proactively when work has not been available and it becomes available.
   * This function is blocking and potentially very slow.  Repeated use is discouraged.
   *
   * @param enable If set true, turns on signaling wake.  If false, turns it off.
   * @param sleepDuration If enable is true, this is the length of time a thread will wait for a
   * signal before waking up.  If enable is false, this is the length of time a thread will sleep
   * between polling.
   *
   * @note It is highly recommended to leave signaling wake enabled on Windows platforms, as
   * sleeping/polling tends to perform poorly for intermittent workloads.  For Mac/Linux platforms,
   * it is okay to enable signaling wake, particularly if you wish to set a longer expected duration
   * between work.  If signaling wake is disabled, ensure sleepDuration is small (e.g. 200us) for
   * best performance.  Most users will not need to call this function, as defaults are reasonable.
   *
   *
   **/
  template <class Rep, class Period>
  void setSignalingWake(
      bool enable,
      const std::chrono::duration<Rep, Period>& sleepDuration =
          std::chrono::microseconds(kDefaultSleepLenUs)) {
    setSignalingWake(
        enable,
        static_cast<uint32_t>(
            std::chrono::duration_cast<std::chrono::microseconds>(sleepDuration).count()));
  }

  /**
   * Change the number of threads backing the thread pool.  This is a blocking and potentially
   * slow operation, and repeatedly resizing is discouraged.
   *
   * @param n The number of threads in use after call completion
   **/
  DISPENSO_DLL_ACCESS void resize(ssize_t n) {
    std::lock_guard<std::mutex> lk(threadsMutex_);
    resizeLocked(n);
  }

  /**
   * Get the number of threads backing the pool.  If called concurrently to <code>resize</code>, the
   * number returned may be stale.
   *
   * @return The current number of threads backing the pool.
   **/
  ssize_t numThreads() const {
    return numThreads_.load(std::memory_order_relaxed);
  }

  /**
   * Schedule a functor to be executed.  If the pool's load factor is high, execution may happen
   * inline by the calling thread.
   *
   * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
   * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
   * std::function or similarly type-erased objects will also work.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f);

  /**
   * Schedule a functor to be executed.  The functor will always be queued and executed by pool
   * threads.
   *
   * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
   * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
   * std::function or similarly type-erased objects will also work.
   **/
  template <typename F>
  DISPENSO_REQUIRES(OnceCallableFunc<F>)
  void schedule(F&& f, ForceQueuingTag);

  /**
   * Schedule multiple functors to be executed in bulk.  This is more efficient than calling
   * schedule() multiple times when you have many tasks to submit, as it reduces atomic contention
   * and allows for better thread wakeup behavior.
   *
   * @param count The number of functors to schedule.
   * @param gen A generator functor that takes an index and returns a functor to execute.
   *            gen(i) will be called for i in [0, count) to produce each task.
   *
   * @note Work is enqueued in chunks and interleaved with inline execution based on the pool's
   *       load factor, preventing both pool thread starvation and excessive queueing overhead.
   **/
  template <typename Generator>
  void scheduleBulk(size_t count, Generator&& gen);

  /**
   * Destruct the pool.  This destructor is blocking until all queued work is completed.  It is
   * illegal to call the destructor while any other thread makes calls to the pool (as is generally
   * the case with C++ classes).
   **/
  DISPENSO_DLL_ACCESS ~ThreadPool();

 private:
  class PerThreadData {
   public:
    void setThread(std::thread&& t);

    bool running();

    void stop();

    ~PerThreadData();

   public:
    alignas(kCacheLineSize) std::thread thread_;
    std::atomic<bool> running_{true};
  };

  DISPENSO_DLL_ACCESS uint32_t wait(uint32_t priorEpoch);
  DISPENSO_DLL_ACCESS void wake();
  DISPENSO_DLL_ACCESS void wakeN(ssize_t n);

  void setSignalingWake(bool enable, uint32_t sleepDurationUs) {
    std::lock_guard<std::mutex> lk(threadsMutex_);
    ssize_t currentPoolSize = numThreads();
    resizeLocked(0);
    enableEpochWaiter_.store(enable, std::memory_order_release);
    sleepLengthUs_.store(sleepDurationUs, std::memory_order_release);
    resizeLocked(currentPoolSize);
  }

  DISPENSO_DLL_ACCESS void resizeLocked(ssize_t n);

  void executeNext(OnceFunction work);

  DISPENSO_DLL_ACCESS void threadLoop(PerThreadData& threadData);

  bool tryExecuteNext();
  bool tryExecuteNextFromProducerToken(moodycamel::ProducerToken& token);

  template <typename F>
  void schedule(moodycamel::ProducerToken& token, F&& f);

  template <typename F>
  void schedule(moodycamel::ProducerToken& token, F&& f, ForceQueuingTag);

  // Core bulk enqueue: unconditionally stage, enqueue, and wake for a chunk of tasks.
  // Caller is responsible for load factor checks. Count should be small (e.g. <= 2*numThreads).
  // When a producer token is provided, uses token-based enqueue for better throughput.
  template <typename Generator>
  void
  scheduleBulkEnqueue(size_t count, Generator&& gen, moodycamel::ProducerToken* token = nullptr);

  // The non-atomic read of two separate atomics (numSleeping_ and workRemaining_) can
  // race, potentially causing a missed wake in rare cases. This is benign: the epoch
  // waiter's sleep timeout (sleepLengthUs_) provides a safety net, so a missed wake only
  // delays wakeup by up to that duration, never causes a hang. Subsequent schedule()
  // calls will trigger wakes.
  void conditionallyWake() {
    if (enableEpochWaiter_.load(std::memory_order_acquire)) {
      ssize_t sleeping = numSleeping_.load(std::memory_order_acquire);
      if (sleeping > 0) {
        // Only wake if there's more pending work than awake threads can handle.
        // Don't count the caller — it may be a pool thread that will continue
        // dequeuing its own work, not processing what it just submitted.
        ssize_t awake = numThreads_.load(std::memory_order_relaxed) - sleeping;
        ssize_t pending = workRemaining_.load(std::memory_order_relaxed);
        if (pending > awake) {
          wake();
        }
      }
    }
  }

 public:
  // If we are not yet C++17, we provide aligned new/delete to avoid false sharing.
#if __cplusplus < 201703L
  static void* operator new(size_t sz) {
    return detail::alignedMalloc(sz);
  }
  static void operator delete(void* ptr) {
    return detail::alignedFree(ptr);
  }
#endif // __cplusplus

 private:
  // Number of tasks each thread accumulates before flushing workRemaining_.
  // This batching reduces atomic contention in threadLoop, but inflates
  // workRemaining_ by up to kWorkBatchSize * numThreads, so poolLoadFactor_
  // must be reduced accordingly for accurate load-shedding in schedule().
  static constexpr int kWorkBatchSize = 8;

  mutable std::mutex threadsMutex_;
  std::deque<PerThreadData> threads_;
  size_t poolLoadMultiplier_;

  // These atomics are read frequently in the hot schedule() path, so they need
  // cache-line alignment to avoid false sharing with the mutex/deque above.
  alignas(kCacheLineSize) std::atomic<ssize_t> poolLoadFactor_;
  std::atomic<ssize_t> numThreads_;

  moodycamel::ConcurrentQueue<OnceFunction> work_;

  alignas(kCacheLineSize) std::atomic<ssize_t> numSleeping_{0};

  alignas(kCacheLineSize) std::atomic<ssize_t> workRemaining_{0};

  alignas(kCacheLineSize) detail::EpochWaiter epochWaiter_;
  alignas(kCacheLineSize) std::atomic<bool> enableEpochWaiter_{kDefaultWakeupEnable};
  std::atomic<uint32_t> sleepLengthUs_{kDefaultSleepLenUs};

#if defined DISPENSO_DEBUG
  alignas(kCacheLineSize) std::atomic<ssize_t> outstandingTaskSets_{0};
#endif // NDEBUG

  friend class ConcurrentTaskSet;
  friend class TaskSet;
  friend class TaskSetBase;
};

/**
 * Get access to the global thread pool.
 *
 * @return the global thread pool
 **/
DISPENSO_DLL_ACCESS ThreadPool& globalThreadPool();

/**
 * Change the number of threads backing the global thread pool.
 *
 * @param numThreads The number of threads to back the global thread pool.
 **/
DISPENSO_DLL_ACCESS void resizeGlobalThreadPool(size_t numThreads);

// ----------------------------- Implementation details -------------------------------------

template <typename F>
DISPENSO_REQUIRES(OnceCallableFunc<F>)
inline void ThreadPool::schedule(F&& f) {
  ssize_t curWork = workRemaining_.load(std::memory_order_relaxed);
  ssize_t quickLoadFactor = numThreads_.load(std::memory_order_relaxed);
  quickLoadFactor += quickLoadFactor / 2;
  if ((detail::PerPoolPerThreadInfo::isPoolRecursive(this) && curWork > quickLoadFactor) ||
      (curWork > poolLoadFactor_.load(std::memory_order_relaxed))) {
    f();
  } else {
    schedule(std::forward<F>(f), ForceQueuingTag());
  }
}

template <typename F>
DISPENSO_REQUIRES(OnceCallableFunc<F>)
inline void ThreadPool::schedule(F&& f, ForceQueuingTag) {
  if (auto* token =
          static_cast<moodycamel::ProducerToken*>(detail::PerPoolPerThreadInfo::producer(this))) {
    schedule(*token, std::forward<F>(f), ForceQueuingTag());
    return;
  }

  if (!numThreads_.load(std::memory_order_relaxed)) {
    f();
    return;
  }
  workRemaining_.fetch_add(1, std::memory_order_release);
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
  bool enqueued = work_.enqueue({std::forward<F>(f)});
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
  (void)(enqueued); // unused
  assert(enqueued);

  conditionallyWake();
}

template <typename F>
inline void ThreadPool::schedule(moodycamel::ProducerToken& token, F&& f) {
  ssize_t curWork = workRemaining_.load(std::memory_order_relaxed);
  ssize_t quickLoadFactor = numThreads_.load(std::memory_order_relaxed);
  quickLoadFactor += quickLoadFactor / 2;
  if ((detail::PerPoolPerThreadInfo::isPoolRecursive(this) && curWork > quickLoadFactor) ||
      (curWork > poolLoadFactor_.load(std::memory_order_relaxed))) {
    f();
  } else {
    schedule(token, std::forward<F>(f), ForceQueuingTag());
  }
}

template <typename F>
inline void ThreadPool::schedule(moodycamel::ProducerToken& token, F&& f, ForceQueuingTag) {
  if (!numThreads_.load(std::memory_order_relaxed)) {
    f();
    return;
  }
  workRemaining_.fetch_add(1, std::memory_order_release);
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
  bool enqueued = work_.enqueue(token, {std::forward<F>(f)});
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
  (void)(enqueued); // unused
  assert(enqueued);

  conditionallyWake();
}

inline bool ThreadPool::tryExecuteNext() {
  OnceFunction next;
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
  bool dequeued = work_.try_dequeue(next);
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
  if (dequeued) {
    executeNext(std::move(next));
    return true;
  }
  return false;
}

inline bool ThreadPool::tryExecuteNextFromProducerToken(moodycamel::ProducerToken& token) {
  OnceFunction next;
  if (work_.try_dequeue_from_producer(token, next)) {
    executeNext(std::move(next));
    return true;
  }
  return false;
}

inline void ThreadPool::executeNext(OnceFunction next) {
  next();
  workRemaining_.fetch_add(-1, std::memory_order_relaxed);
}

namespace detail {
// Generating iterator for scheduleBulkEnqueue. Produces OnceFunction objects
// on-the-fly during enqueue_bulk, avoiding the need for a staging buffer.
// moodycamel's enqueue_bulk uses single-pass input iterator semantics.
template <typename Generator>
struct BulkGenIter {
  using difference_type = std::ptrdiff_t;
  using value_type = OnceFunction;
  using pointer = OnceFunction*;
  using reference = OnceFunction&;
  using iterator_category = std::input_iterator_tag;

  Generator* gen;
  size_t index;
  OnceFunction operator*() {
    return (*gen)(index);
  }
  BulkGenIter& operator++() {
    ++index;
    return *this;
  }
  BulkGenIter operator++(int) {
    BulkGenIter tmp = *this;
    ++index;
    return tmp;
  }
};
} // namespace detail

template <typename Generator>
void ThreadPool::scheduleBulkEnqueue(
    size_t count,
    Generator&& gen,
    moodycamel::ProducerToken* token) {
  detail::BulkGenIter<typename std::remove_reference<Generator>::type> it{&gen, 0};

  // Single atomic update + bulk enqueue
  workRemaining_.fetch_add(static_cast<ssize_t>(count), std::memory_order_release);

  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
  bool enqueued;
  if (token) {
    enqueued = work_.enqueue_bulk(*token, it, count);
  } else {
    enqueued = work_.enqueue_bulk(it, count);
  }
  DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
  (void)(enqueued);
  assert(enqueued);

  // Wake appropriate threads
  if (enableEpochWaiter_.load(std::memory_order_acquire)) {
    ssize_t sleeping = numSleeping_.load(std::memory_order_acquire);
    ssize_t toWake = std::min(sleeping, static_cast<ssize_t>(count));
    if (toWake > 0) {
      wakeN(toWake);
    }
  }
}

template <typename Generator>
void ThreadPool::scheduleBulk(size_t count, Generator&& gen) {
  if (count == 0) {
    return;
  }

  ssize_t numPool = numThreads_.load(std::memory_order_relaxed);
  if (!numPool) {
    // No threads in pool - execute all inline
    for (size_t i = 0; i < count; ++i) {
      gen(i)();
    }
    return;
  }

  // Process in chunks, interleaving enqueue and inline execution based on load.
  size_t chunkSize = static_cast<size_t>(numPool) + static_cast<size_t>(numPool) / 2;
  if (chunkSize < 1) {
    chunkSize = 1;
  }
  size_t i = 0;
  while (i < count) {
    ssize_t curWork = workRemaining_.load(std::memory_order_relaxed);
    ssize_t loadFactor = poolLoadFactor_.load(std::memory_order_relaxed);
    if (curWork > loadFactor) {
      // Over load factor - execute one task inline, then re-check
      gen(i)();
      ++i;
    } else {
      // Under load factor - enqueue a chunk
      ssize_t room = loadFactor - curWork;
      size_t toEnqueue = std::min({count - i, chunkSize, static_cast<size_t>(room)});
      if (toEnqueue == 0) {
        toEnqueue = 1;
      }
      size_t base = i;
      scheduleBulkEnqueue(toEnqueue, [&gen, base](size_t j) { return gen(base + j); });
      i += toEnqueue;
    }
  }
}

} // namespace dispenso


================================================
FILE: dispenso/timed_task.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <iostream>

#include <dispenso/detail/quanta.h>
#include <dispenso/timed_task.h>

namespace dispenso {

TimedTaskScheduler::TimedTaskScheduler(ThreadPriority prio) : priority_(prio) {
  thread_ = std::thread([this, prio]() {
    detail::registerFineSchedulerQuanta();
    if (!setCurrentThreadPriority(prio)) {
      std::cerr << "Couldn't set thread priority" << std::endl;
    }
    timeQueueRunLoop();
  });
}
TimedTaskScheduler::~TimedTaskScheduler() {
  {
    std::lock_guard<std::mutex> lk(queueMutex_);
    running_ = false;
  }
  epoch_.bumpAndWake();
  thread_.join();
}

void TimedTaskScheduler::kickOffTask(std::shared_ptr<detail::TimedTaskImpl> next, double curTime) {
  size_t remaining = next->timesToRun.fetch_sub(1, std::memory_order_acq_rel);
  if (remaining == 1) {
    auto* np = next.get();
    np->func(std::move(next));
  } else if (remaining > 1) {
    next->func(next);

    if (next->steady) {
      next->nextAbsTime += next->period;
    } else {
      next->nextAbsTime = curTime + next->period;
    }
    std::lock_guard<std::mutex> lk(queueMutex_);
    tasks_.push(std::move(next));
  }
}

constexpr double kSmallTimeBuffer = 10e-6;

void TimedTaskScheduler::timeQueueRunLoop() {
#if defined(_WIN32)
  constexpr double kSpinYieldBuffer = 1e-3;
  constexpr double kSpinBuffer = 100e-6;
#else
  constexpr double kSpinYieldBuffer = 500e-6;
  constexpr double kSpinBuffer = 50e-6;
#endif // platform
  constexpr double kConvertToUs = 1e6;

  uint32_t curEpoch = epoch_.current();

  while (true) {
    {
      std::unique_lock<std::mutex> lk(queueMutex_);
      if (priority_ != getCurrentThreadPriority()) {
        setCurrentThreadPriority(priority_);
      }

      if (!running_) {
        break;
      }
      if (tasks_.empty()) {
        lk.unlock();
        curEpoch = epoch_.wait(curEpoch);
        continue;
      }
    }
    double curTime = getTime();
    double timeRemaining;
    std::unique_lock<std::mutex> lk(queueMutex_);
    timeRemaining = tasks_.top()->nextAbsTime - curTime;
    if (timeRemaining < kSmallTimeBuffer) {
      auto next = tasks_.top();
      tasks_.pop();
      lk.unlock();

      kickOffTask(std::move(next), curTime);
    } else if (timeRemaining < kSpinBuffer) {
      continue;
    } else if (timeRemaining < kSpinYieldBuffer) {
      lk.unlock();
      std::this_thread::yield();
      continue;
    } else {
      lk.unlock();
      curEpoch = epoch_.waitFor(
          curEpoch, static_cast<uint64_t>((timeRemaining - kSpinBuffer) * kConvertToUs));
    }
  }
}

void TimedTaskScheduler::addTimedTask(std::shared_ptr<detail::TimedTaskImpl> task) {
  double curTime = getTime();
  double timeRemaining;
  timeRemaining = task->nextAbsTime - curTime;
  if (timeRemaining < kSmallTimeBuffer) {
    kickOffTask(std::move(task), curTime);
  } else {
    std::lock_guard<std::mutex> lk(queueMutex_);
    tasks_.push(std::move(task));
  }
  epoch_.bumpAndWake();
}

TimedTaskScheduler& globalTimedTaskScheduler() {
  static TimedTaskScheduler scheduler;
  return scheduler;
}

} // namespace dispenso


================================================
FILE: dispenso/timed_task.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file timed_task.h
 * @ingroup group_async
 * Utilities for delaying a task until a future time and periodic scheduling.
 **/

#pragma once

#include <chrono>
#include <functional>
#include <memory>
#include <queue>

#include <dispenso/detail/timed_task_impl.h>
#include <dispenso/priority.h>
#include <dispenso/timing.h>

namespace dispenso {

/**
 * For periodic tasks, this type will describe the behavior of how tasks are scheduled
 **/
enum class TimedTaskType {
  kNormal ///< Schedule the next task at period plus the time the first or most recent task ran
  ,
  kSteady ///< Schedule the next task at period plus the time the first or most recent task was
          ///< scheduled for
};

/**
 * A timed task type.  This encapsulates a function that will run at a time scheduled in the future,
 * and optionally periodically for a specified number of times.
 **/

class TimedTask {
 public:
  TimedTask(const TimedTask&) = delete;
  /** Move constructor. */
  TimedTask(TimedTask&& other) : impl_(std::move(other.impl_)) {}

  TimedTask& operator=(const TimedTask&) = delete;
  TimedTask& operator=(TimedTask&& other) {
    impl_ = std::move(other.impl_);
    return *this;
  }

  /**
   * Cancel the task.  No further runs to the underlying function will occur (though any calls in
   * progress will complete).
   **/
  void cancel() {
    impl_->timesToRun.store(0, std::memory_order_release);
    impl_->flags.fetch_or(detail::kFFlagsCancelled, std::memory_order_release);
  }

  /**
   * Detach this task so that it cannot block in destruction, and destruction does not cancel
   * further runs to the underlying function.  This is only to be used in cases where the underlying
   * schedulable and any function resources are expected to outlive any other copy of the task.
   **/
  void detach() {
    impl_->flags.fetch_or(detail::kFFlagsDetached, std::memory_order_release);
  }

  /**
   * See how many calls there have been to the underlying function have completed thus far.
   *
   * @return the count of calls made this far to the underlying function.
   **/
  size_t calls() const {
    return impl_->count.load(std::memory_order_acquire);
  }

  /**
   * Destroy the timed task.  If detach() has not been called, this will cancel further calls to the
   * underlying scheduled function, and will wait if necessary until the function is no longer in
   * progress.  This is to naturally protect resources that may be used by the function.  If
   * detach() was called, the destructor will not block.
   *
   **/
  ~TimedTask() {
    if (!impl_ || impl_->flags.load(std::memory_order_acquire) & detail::kFFlagsDetached) {
      return;
    }
    cancel();
    while (impl_->inProgress.load(std::memory_order_acquire)) {
    }
    // Now we can safely destroy the underlying function.  We do this here because we can't risk
    // that func may call code in it's destructor that may no longer be relevant after this
    // TimedTask destructor completes.
    impl_->func = {};
  }

 private:
  template <typename Schedulable, typename F>
  TimedTask(
      Schedulable& sched,
      F&& f,
      double nextRunAbs,
      double period = 0.0,
      size_t timesToRun = 1,
      TimedTaskType type = TimedTaskType::kNormal)
      : impl_(
            detail::make_shared<detail::TimedTaskImpl>(
                timesToRun,
                nextRunAbs,
                period,
                std::forward<F>(f),
                sched,
                type == TimedTaskType::kSteady)) {}

  std::shared_ptr<detail::TimedTaskImpl> impl_;

  friend class TimedTaskScheduler;
}; // namespace dispenso

/**
 * A timed-task scheduler running on a single thread. This allows multiple schedulers if necessary,
 * if e.g. you wish to schedule against InlineInvoker backing schedulable and you're running
 * a lot of relatively quick timed tasks.Most people should just use the global timed task
 * scheduler.
 **/
class TimedTaskScheduler {
 public:
  /**
   * Create a TimedTaskScheduler with specified priority
   *
   * @param priority The priority to set.  For highest level of periodicity accuracy, use kRealtime.
   * @note Priorities above kNormal should be used sparingly, and only short-running tasks should be
   * run inline to avoid bad OS responsivity.  When using a ThreadPool Schedulable, tasks can be
   * long running.
   **/
  DISPENSO_DLL_ACCESS explicit TimedTaskScheduler(
      ThreadPriority priority = ThreadPriority::kNormal);
  DISPENSO_DLL_ACCESS ~TimedTaskScheduler();

  /**
   * Set the priority for the backing thread.  See note for constructor.
   **/
  void setPriority(ThreadPriority priority) {
    std::lock_guard<std::mutex> lk(queueMutex_);
    priority_ = priority;
  }

  /**
   * Schedule a task
   *
   * @param sched A backing schedulable, such as ImmediateInvoker, NewThreadInvoker, TaskSet, or
   *        ThreadPool to run the function in when it is scheduled.
   * @param func A bool() function to run when scheduled.  The function may return true to indicate
   *        it should continue to be scheduled, or false to cancel.
   * @param nextRunAbs The absolute time to run the function.  Time scale is expected to match
   *        getTime() for absolute times.
   * @param period The period in seconds.
   * @param timesToRun The number of times to run the function.  After that number, the function
   *        will not be called again.
   * @param type The type of periodicity (if any).
   **/
  template <typename Schedulable, typename F>
  TimedTask schedule(
      Schedulable& sched,
      F&& func,
      double nextRunAbs,
      double period = 0.0,
      size_t timesToRun = 1,
      TimedTaskType type = TimedTaskType::kNormal) {
    TimedTask task(sched, std::forward<F>(func), nextRunAbs, period, timesToRun, type);
    addTimedTask(task.impl_);
    return task;
  }

  /**
   * Schedule a task to run once at a time in the future
   *
   * @param sched A backing schedulable, such as ImmediateInvoker, NewThreadInvoker, TaskSet, or
   *        ThreadPool to run the function in when it is scheduled.
   * @param func A bool() function to run when scheduled.  The function may return true to indicate
   *        it should continue to be scheduled, or false to cancel.
   * @param timeInFuture the amount of time from current at which to schedule the function
   **/
  template <typename Schedulable, typename Rep, typename Period, typename F>
  TimedTask
  schedule(Schedulable& sched, F&& func, const std::chrono::duration<Rep, Period>& timeInFuture) {
    return schedule(sched, std::forward<F>(func), toNextRun(timeInFuture));
  }

  /**
   * Schedule a task to run once at a time in the future
   *
   * @param sched A backing schedulable, such as ImmediateInvoker, NewThreadInvoker, TaskSet, or
   *        ThreadPool to run the function in when it is scheduled.
   * @param func A bool() function to run when scheduled.  The function may return true to indicate
   *        it should continue to be scheduled, or false to cancel.
   * @param nextRunTime An absolute time to run the function.  If in the past, func will run
   *        immediately.
   **/
  template <typename Schedulable, typename Clock, typename Duration, typename F>
  TimedTask schedule(
      Schedulable& sched,
      F&& func,
      const std::chrono::time_point<Clock, Duration>& nextRunTime) {
    return schedule(sched, std::forward<F>(func), toNextRun(nextRunTime));
  }

  /**
   * Schedule a task to run periodically
   *
   * @param sched A backing schedulable, such as ImmediateInvoker, NewThreadInvoker, TaskSet, or
   *        ThreadPool to run the function in when it is scheduled.
   * @param func A bool() function to run when scheduled.  The function may return true to indicate
   *        it should continue to be scheduled, or false to cancel.
   * @param timeInFuture the amount of time from current at which to schedule the function
   * @param period The period defining the run frequency
   * @param timesToRun The number of times to run the function.  After that number, the function
   *        will not be called again.
   * @param type The type of periodicity (if any).
   **/
  template <typename Schedulable, typename Rep, typename Period, typename F>
  TimedTask schedule(
      Schedulable& sched,
      F&& func,
      const std::chrono::duration<Rep, Period>& timeInFuture,
      const std::chrono::duration<Rep, Period>& period,
      size_t timesToRun = std::numeric_limits<size_t>::max(),
      TimedTaskType type = TimedTaskType::kNormal) {
    return schedule(
        sched, std::forward<F>(func), toNextRun(timeInFuture), toPeriod(period), timesToRun, type);
  }

  /**
   * Schedule a task to run periodically
   *
   * @param sched A backing schedulable, such as ImmediateInvoker, NewThreadInvoker, TaskSet, or
   *        ThreadPool to run the function in when it is scheduled.
   * @param func A bool() function to run when scheduled.  The function may return true to indicate
   *        it should continue to be scheduled, or false to cancel.
   * @param nextRunTime An absolute time to run the function.  If in the past, func will run
   *        immediately.
   * @param period The period defining the run frequency
   * @param timesToRun The number of times to run the function.  After that number, the function
   *        will not be called again.
   * @param type The type of periodicity (if any).
   **/
  template <
      typename Schedulable,
      typename Rep,
      typename Period,
      typename Clock,
      typename Duration,
      typename F>
  TimedTask schedule(
      Schedulable& sched,
      F&& func,
      const std::chrono::time_point<Clock, Duration>& nextRunTime,
      const std::chrono::duration<Rep, Period>& period,
      size_t timesToRun = std::numeric_limits<size_t>::max(),
      TimedTaskType type = TimedTaskType::kNormal) {
    return schedule(
        sched, std::forward<F>(func), toNextRun(nextRunTime), toPeriod(period), timesToRun, type);
  }

 private:
  template <class Rep, class Period>
  static double toNextRun(const std::chrono::duration<Rep, Period>& timeInFuture) {
    return getTime() + std::chrono::duration<double>(timeInFuture).count();
  }

  template <typename Clock, typename Duration>
  static double toNextRun(const std::chrono::time_point<Clock, Duration>& nextRunTime) {
    auto curTime = Clock::now();
    return toNextRun(nextRunTime - curTime);
  }

  template <class Rep, class Period>
  static double toPeriod(const std::chrono::duration<Rep, Period>& period) {
    return std::chrono::duration<double>(period).count();
  }
  DISPENSO_DLL_ACCESS void addTimedTask(std::shared_ptr<detail::TimedTaskImpl> task);
  void timeQueueRunLoop();

  void kickOffTask(std::shared_ptr<detail::TimedTaskImpl> next, double curTime);

  struct Compare {
    bool operator()(
        const std::shared_ptr<detail::TimedTaskImpl>& a,
        const std::shared_ptr<detail::TimedTaskImpl>& b) const {
      return a->nextAbsTime > b->nextAbsTime;
    }
  };

  // TODO(bbudge): Consider lock-free priority queue implementation.  I'd expect it to be minimally
  // beneficial for this use case though... timed tasks should rarely be super-high contention.
  std::mutex queueMutex_;
  std::priority_queue<
      std::shared_ptr<detail::TimedTaskImpl>,
      std::vector<std::shared_ptr<detail::TimedTaskImpl>>,
      Compare>
      tasks_;
  bool running_{true};
  detail::EpochWaiter epoch_;
  std::thread thread_;
  ThreadPriority priority_;
};

/**
 * Access the global timed task scheduler.  Most applications should only require one scheduler.
 *
 * @return A reference to the single instance global timed task scheduler.
 **/
DISPENSO_DLL_ACCESS TimedTaskScheduler& globalTimedTaskScheduler();

} // namespace dispenso


================================================
FILE: dispenso/timing.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/timing.h>

#include <chrono>
#include <cmath>

#if defined(_MSC_VER)
#include <intrin.h>
#endif // _MSC_VER

#if defined(_WIN32)
#include <Windows.h>
#endif // _WIN32

#if defined(__MACH__)
#include <mach/mach.h>
#include <mach/mach_time.h>
#endif // __MACH__

namespace dispenso {
namespace {
#if defined(__x86_64__) || defined(_M_AMD64)
#define DISPENSO_HAS_TIMESTAMP
#if defined(_MSC_VER)
inline uint64_t rdtscp() {
  uint32_t ui;
  return __rdtscp(&ui);
}

#else
inline uint64_t rdtscp() {
  uint32_t lo, hi;
  __asm__ volatile("rdtscp"
                   : /* outputs */ "=a"(lo), "=d"(hi)
                   : /* no inputs */
                   : /* clobbers */ "%rcx");
  return (uint64_t)lo | (((uint64_t)hi) << 32);
}
#endif // OS
#elif (defined(__GNUC__) || defined(__clang__)) && defined(__aarch64__)
#define DISPENSO_HAS_TIMESTAMP
uint64_t rdtscp(void) {
  uint64_t val;
  __asm__ volatile("mrs %0, cntvct_el0" : "=r"(val));
  return val;
}
#endif // ARCH
} // namespace

#if defined(DISPENSO_HAS_TIMESTAMP)

#if !defined(__aarch64__)

static bool snapFreq(double& firstApprox) {
  switch (static_cast<int>(firstApprox)) {
    case 0:
      if (std::abs(int(firstApprox * 10.0)) <= 1) {
        firstApprox = 0.0;
        return true;
      }
      break;
    case 9:
      if (std::abs(int(firstApprox * 10.0) - 99) <= 1) {
        firstApprox = 10.0;

        return true;
      }
      break;
    case 3:
      if (std::abs(int(firstApprox * 10.0) - 33) <= 1) {
        firstApprox = 3.0 + 1.0 / 3.0;
        return true;
      }
      break;
    case 6:
      if (std::abs(int(firstApprox * 10.0) - 66) <= 1) {
        firstApprox = 6.0 + 2.0 / 3.0;
        return true;
      }
      break;
  }
  return false;
}

static double fallbackTicksPerSecond() {
  using namespace std::chrono_literals;
  constexpr double kChronoOverheadBias = 250e-9;

  auto baseStart = std::chrono::high_resolution_clock::now();
  auto start = rdtscp();
  std::this_thread::sleep_for(50ms);
  auto end = rdtscp();
  auto baseEnd = std::chrono::high_resolution_clock::now();

  auto base = std::chrono::duration<double>(baseEnd - baseStart).count() - kChronoOverheadBias;
  double firstApprox = (static_cast<double>(end - start)) / base;

  // Try to refine the approximation.  In some circumstances we can "snap" the frequency to a very
  // good guess that is off by less than one part in thousands.  Accuracy should already be quite
  // good in any case, but this allows us to improve in some cases.

  // Get first 3 digits
  firstApprox *= 1e-7;

  int firstInt = static_cast<int>(firstApprox);
  firstApprox -= firstInt;

  firstApprox *= 10.0;

  if (!snapFreq(firstApprox)) {
    int secondInt = static_cast<int>(firstApprox);
    firstApprox -= secondInt;
    firstApprox *= 10.0;
    snapFreq(firstApprox);
    firstApprox *= 0.1;
    firstApprox += secondInt;
  }

  firstApprox *= 0.1;

  firstApprox += firstInt;
  firstApprox *= 1e7;
  return firstApprox;
}
#endif // !__aarch64__

#if defined(__aarch64__)
static double ticksPerSecond() {
  uint64_t val;
  __asm__ volatile("mrs %0, cntfrq_el0" : "=r"(val));
  return static_cast<double>(val);
}
#elif defined(__MACH__)
static double ticksPerSecond() {
  mach_timebase_info_data_t info;
  if (mach_timebase_info(&info) != KERN_SUCCESS) {
    return fallbackTicksPerSecond();
  }
  return 1e9 * static_cast<double>(info.denom) / static_cast<double>(info.numer);
}
#else
double ticksPerSecond() {
  return fallbackTicksPerSecond();
}
#endif

double getTime() {
  static double secondsPerTick = 1.0 / ticksPerSecond();
  static double startTime = static_cast<double>(rdtscp()) * secondsPerTick;

  double t = static_cast<double>(rdtscp()) * secondsPerTick;
  return t - startTime;
}
#else
double getTime() {
  static auto startTime = std::chrono::high_resolution_clock::now();
  auto cur = std::chrono::high_resolution_clock::now();

  return std::chrono::duration<double>(cur - startTime).count();
}
#endif // DISPENSO_HAS_TIMESTAMP

namespace {
// This should ensure that we initialize the time before main.
double g_dummyTime = getTime();
} // namespace

} // namespace dispenso


================================================
FILE: dispenso/timing.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file timing.h
 * @ingroup group_util
 * Utilities for getting the current time.
 **/

#pragma once

#include <dispenso/platform.h>

namespace dispenso {

/**
 * Get elapsed time in seconds since program start.
 *
 * Uses high-resolution timing when available (e.g., RDTSC on x86).
 *
 * @return Elapsed time in seconds as a double.
 */
DISPENSO_DLL_ACCESS double getTime();

} // namespace dispenso


================================================
FILE: dispenso/tsan_annotations.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/tsan_annotations.h>

#if DISPENSO_HAS_TSAN

#ifdef __GNUC__
#define ATTRIBUTE_WEAK __attribute__((weak))
#else
#define ATTRIBUTE_WEAK
#endif

// These are found in the accompanying libtsan, but there is no header exposing them.  We want to
// also avoid exposing them in a header to to discourage folks from calling them directly.
extern "C" {
void AnnotateIgnoreReadsBegin(const char* f, int l) ATTRIBUTE_WEAK;

void AnnotateIgnoreReadsEnd(const char* f, int l) ATTRIBUTE_WEAK;

void AnnotateIgnoreWritesBegin(const char* f, int l) ATTRIBUTE_WEAK;

void AnnotateIgnoreWritesEnd(const char* f, int l) ATTRIBUTE_WEAK;

void AnnotateNewMemory(const char* f, int l, const volatile void* address, long size)
    ATTRIBUTE_WEAK;

void AnnotateHappensBefore(const char* f, int l, const volatile void* address) ATTRIBUTE_WEAK;
void AnnotateHappensAfter(const char* f, int l, const volatile void* address) ATTRIBUTE_WEAK;
}

namespace dispenso {
namespace detail {

void annotateIgnoreWritesBegin(const char* f, int l) {
  AnnotateIgnoreWritesBegin(f, l);
}
void annotateIgnoreWritesEnd(const char* f, int l) {
  AnnotateIgnoreWritesEnd(f, l);
}
void annotateIgnoreReadsBegin(const char* f, int l) {
  AnnotateIgnoreReadsBegin(f, l);
}
void annotateIgnoreReadsEnd(const char* f, int l) {
  AnnotateIgnoreReadsEnd(f, l);
}

void annotateNewMemory(const char* f, int l, const volatile void* address, long size) {
  AnnotateNewMemory(f, l, address, size);
}

void annotateHappensBefore(const char* f, int l, const volatile void* address) {
  AnnotateHappensBefore(f, l, address);
}

void annotateHappensAfter(const char* f, int l, const volatile void* address) {
  AnnotateHappensAfter(f, l, address);
}

} // namespace detail
} // namespace dispenso

#endif // TSAN


================================================
FILE: dispenso/tsan_annotations.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file tsan_annotations.h
 * @ingroup group_util
 * This file exposes a set of macros for ignoring tsan errors.  These should generally not
 * be used just to shut up TSAN, because most of the time, TSAN reports real bugs.  They should be
 * used only when there is a high level of certainty that TSAN is spitting out a false positive, as
 * can occasionally happen with lock-free algorithms.
 *
 * When these are required, it is best to keep the scope as small as possible to avoid blinding TSAN
 * to real bugs. Note that several libraries already expose macros like these, but we want to
 * keep dependencies to a bare minimum.
 **/

#pragma once

#include <dispenso/platform.h>

#if defined(__SANITIZE_THREAD__)
#define DISPENSO_HAS_TSAN 1
#elif defined(__has_feature)
#if __has_feature(thread_sanitizer)
#define DISPENSO_HAS_TSAN 1
#else
#define DISPENSO_HAS_TSAN 0
#endif // TSAN
#else
#define DISPENSO_HAS_TSAN 0
#endif // feature

#if DISPENSO_HAS_TSAN

namespace dispenso {
namespace detail {

DISPENSO_DLL_ACCESS void annotateIgnoreWritesBegin(const char* f, int l);
DISPENSO_DLL_ACCESS void annotateIgnoreWritesEnd(const char* f, int l);
DISPENSO_DLL_ACCESS void annotateIgnoreReadsBegin(const char* f, int l);
DISPENSO_DLL_ACCESS void annotateIgnoreReadsEnd(const char* f, int l);
DISPENSO_DLL_ACCESS void
annotateNewMemory(const char* f, int l, const volatile void* address, long size);
DISPENSO_DLL_ACCESS void annotateHappensBefore(const char* f, int l, const volatile void* address);
DISPENSO_DLL_ACCESS void annotateHappensAfter(const char* f, int l, const volatile void* address);

} // namespace detail
} // namespace dispenso

#define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN() \
  ::dispenso::detail::annotateIgnoreWritesBegin(__FILE__, __LINE__)
#define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END() \
  ::dispenso::detail::annotateIgnoreWritesEnd(__FILE__, __LINE__)
#define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_BEGIN() \
  ::dispenso::detail::annotateIgnoreReadsBegin(__FILE__, __LINE__)
#define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_END() \
  ::dispenso::detail::annotateIgnoreReadsEnd(__FILE__, __LINE__)
#define DISPENSO_TSAN_ANNOTATE_NEW_MEMORY(address, size) \
  ::dispenso::detail::annotateNewMemory(__FILE__, __LINE__, address, size)
#define DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(address) \
  ::dispenso::detail::annotateHappensBefore(__FILE__, __LINE__, address)
#define DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(address) \
  ::dispenso::detail::annotateHappensAfter(__FILE__, __LINE__, address)

#else

#define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN()
#define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END()
#define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_BEGIN()
#define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_END()
#define DISPENSO_TSAN_ANNOTATE_NEW_MEMORY(address, size)
#define DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(address)
#define DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(address)

#endif // DISPENSO_HAS_TSAN


================================================
FILE: dispenso/util.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @file util.h
 * @ingroup group_util
 * A collection of utility functions and types for memory alignment, bit manipulation,
 * and performance optimization.
 *
 * @note The constant `kCacheLineSize` used by several utilities here is defined in
 *       `<dispenso/platform.h>`, which is included by this header.
 **/

#pragma once

#include <dispenso/detail/math.h>
#include <dispenso/detail/op_result.h>
#include <dispenso/platform.h>

namespace dispenso {

/**
 * @brief Allocate memory with a specified alignment.
 *
 * This function allocates memory aligned to the specified boundary. The alignment
 * must be a power of 2 and at least sizeof(uintptr_t).
 *
 * @param bytes The number of bytes to allocate
 * @param alignment The alignment requirement in bytes (must be power of 2)
 * @return Pointer to aligned memory, or nullptr on allocation failure
 *
 * @note Memory allocated with alignedMalloc must be freed with alignedFree
 * @see alignedFree
 *
 * Example:
 * @code
 * void* ptr = dispenso::alignedMalloc(1024, 64);  // 64-byte aligned
 * // ... use ptr ...
 * dispenso::alignedFree(ptr);
 * @endcode
 */
inline void* alignedMalloc(size_t bytes, size_t alignment) {
  return detail::alignedMalloc(bytes, alignment);
}

/**
 * @brief Allocate memory aligned to cache line size.
 *
 * This is a convenience overload that aligns to kCacheLineSize (typically 64 bytes),
 * which helps avoid false sharing in concurrent data structures.
 *
 * @param bytes The number of bytes to allocate
 * @return Pointer to cache-line aligned memory, or nullptr on allocation failure
 *
 * @note Memory allocated with alignedMalloc must be freed with alignedFree
 * @see alignedFree, kCacheLineSize
 */
inline void* alignedMalloc(size_t bytes) {
  return detail::alignedMalloc(bytes);
}

/**
 * @brief Free memory allocated by alignedMalloc.
 *
 * @param ptr Pointer to memory allocated by alignedMalloc (can be nullptr)
 *
 * @see alignedMalloc
 */
inline void alignedFree(void* ptr) {
  detail::alignedFree(ptr);
}

/**
 * @brief Deleter for smart pointers that use aligned memory allocation.
 *
 * This deleter calls the destructor and frees memory allocated with alignedMalloc.
 * It can be used with std::unique_ptr and std::shared_ptr.
 *
 * @tparam T The type being deleted
 *
 * Example:
 * @code
 * using AlignedPtr = std::unique_ptr<MyType, dispenso::AlignedDeleter<MyType>>;
 * void* mem = dispenso::alignedMalloc(sizeof(MyType), 64);
 * AlignedPtr ptr(new (mem) MyType(), dispenso::AlignedDeleter<MyType>());
 * @endcode
 */
template <typename T>
using AlignedDeleter = detail::AlignedFreeDeleter<T>;

/**
 * @brief Align a value up to the next cache line boundary.
 *
 * Rounds up the input value to the next multiple of kCacheLineSize. Useful for
 * manual memory layout to avoid false sharing.
 *
 * @param val The value to align
 * @return Value aligned to next cache line boundary
 *
 * @see kCacheLineSize
 *
 * Example:
 * @code
 * size_t offset = dispenso::alignToCacheLine(37);  // Returns 64
 * @endcode
 */
inline constexpr uintptr_t alignToCacheLine(uintptr_t val) {
  return detail::alignToCacheLine(val);
}

/**
 * @brief CPU relaxation hint for spin loops.
 *
 * Emits a platform-specific instruction (PAUSE on x86, YIELD on ARM) to improve
 * spin loop performance and reduce power consumption. Use this in busy-wait loops
 * to be friendlier to hyper-threading and the CPU pipeline.
 *
 * @note This is a no-op on platforms without a specific relax instruction
 *
 * Example:
 * @code
 * while (!flag.load(std::memory_order_acquire)) {
 *   dispenso::cpuRelax();  // Be nice to the CPU
 * }
 * @endcode
 */
inline void cpuRelax() {
  detail::cpuRelax();
}

/**
 * @brief Round up to the next power of 2.
 *
 * Computes the smallest power of 2 that is greater than or equal to the input value.
 *
 * @param v Input value
 * @return Next power of 2 (or v if v is already a power of 2)
 *
 * @note Returns 0 if v is 0
 *
 * Example:
 * @code
 * static_assert(dispenso::nextPow2(17) == 32);
 * static_assert(dispenso::nextPow2(64) == 64);
 * @endcode
 */
constexpr uint64_t nextPow2(uint64_t v) {
  return detail::nextPow2(v);
}

/**
 * @brief Compute log base 2 of a value (compile-time).
 *
 * Computes floor(log2(v)) at compile time. Useful for template metaprogramming
 * and constexpr contexts.
 *
 * @param v Input value (must be > 0)
 * @return floor(log2(v))
 *
 * @note Behavior is undefined if v is 0
 *
 * Example:
 * @code
 * static_assert(dispenso::log2const(64) == 6);
 * static_assert(dispenso::log2const(100) == 6);
 * @endcode
 */
constexpr inline uint32_t log2const(uint64_t v) {
  return detail::log2const(v);
}

/**
 * @brief Compute log base 2 of a value (runtime).
 *
 * Computes floor(log2(v)) using platform-specific intrinsics for optimal performance.
 * On x86/x64, uses __builtin_clzll or __lzcnt64. Falls back to constexpr version
 * on other platforms.
 *
 * @param v Input value (must be > 0)
 * @return floor(log2(v))
 *
 * @note Behavior is undefined if v is 0
 *
 * Example:
 * @code
 * uint32_t power = dispenso::log2(size);  // Fast bit scan
 * @endcode
 */
inline uint32_t log2(uint64_t v) {
  return detail::log2(v);
}

/**
 * @brief Buffer with proper alignment for type T.
 *
 * Provides uninitialized storage with proper alignment for type T. Useful for
 * manual object lifetime management or placement new scenarios.
 *
 * @tparam T The type to provide storage for
 *
 * Example:
 * @code
 * dispenso::AlignedBuffer<MyType> buf;
 * MyType* obj = new (buf.b) MyType();
 * obj->~MyType();
 * @endcode
 */
template <typename T>
using AlignedBuffer = detail::AlignedBuffer<T>;

/**
 * @brief Cache-line aligned atomic pointer.
 *
 * An atomic pointer aligned to cache line boundary to avoid false sharing.
 * Inherits from std::atomic<T*>.
 *
 * @tparam T The pointed-to type
 *
 * Example:
 * @code
 * dispenso::AlignedAtomic<int> ptr;
 * ptr.store(new int(42));
 * @endcode
 */
template <typename T>
using AlignedAtomic = detail::AlignedAtomic<T>;

/**
 * @brief Optional-like storage with in-place construction (C++14 compatible).
 *
 * Provides similar functionality to std::optional but works in C++14 codebases.
 * Stores the value in-place and tracks whether a value is present.
 *
 * @tparam T The type to store
 *
 * @note For small types, this has ~2x the size overhead of std::optional (pointer vs bool flag).
 *       For larger types, the overhead is similar. Prefer std::optional in C++17 and later.
 *
 * Example:
 * @code
 * dispenso::OpResult<int> result;
 * if (success) {
 *   result.emplace(42);
 * }
 * if (result) {
 *   use(result.value());
 * }
 * @endcode
 */
template <typename T>
using OpResult = detail::OpResult<T>;

/**
 * @brief Information for statically chunking a range across threads.
 *
 * When dividing work into static chunks, using a simple chunk size plus remainder
 * can lead to poor load balancing. This struct provides the optimal chunking strategy
 * where some tasks get ceil(items/chunks) work and others get floor(items/chunks).
 */
using StaticChunking = detail::StaticChunking;

/**
 * @brief Compute optimal static chunking for load balancing.
 *
 * Divides items into chunks such that the work is distributed as evenly as possible.
 * Returns chunking info where some tasks get ceil(items/chunks) and others get
 * floor(items/chunks).
 *
 * @param items Total number of items to process
 * @param chunks Number of chunks to divide into (must be > 0)
 * @return StaticChunking information for distributing the work
 *
 * Example:
 * @code
 * auto chunking = dispenso::staticChunkSize(100, 8);
 * // First 4 threads get 13 items each, last 4 get 12 items each
 * // 4*13 + 4*12 = 52 + 48 = 100
 * @endcode
 */
inline StaticChunking staticChunkSize(ssize_t items, ssize_t chunks) {
  return detail::staticChunkSize(items, chunks);
}

} // namespace dispenso


================================================
FILE: dispenso/utils/graph_dot.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/graph.h>
#include <fstream>
#include <string>
#include <unordered_map>

namespace detail {
inline std::string getName(
    const void* ptr,
    const size_t index,
    const std::unordered_map<uintptr_t, std::string>* nodeNames) {
  const uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
  if (nodeNames) {
    auto it = nodeNames->find(key);
    if (it != nodeNames->end()) {
      return it->second;
    }
  }
  return std::to_string(index);
}
} // namespace detail

namespace dispenso {
template <typename G>
void graphsToDot(
    const char* filename,
    const G& graph,
    const std::unordered_map<uintptr_t, std::string>* nodeNames) {
  using SubgraphType = typename G::SubgraphType;
  using NodeType = typename G::NodeType;
  std::ofstream datfile(filename);
  datfile << R"dot(digraph {
  rankdir = LR
  node [shape = rectangle, style = filled, colorscheme=pastel19]
  graph [style = filled, color = Gray95]

  subgraph cluster_l { label = "Legend"; style=solid; color=black
    empty1 [style = invis, shape=point]
    empty2 [style = invis, shape=point]
    incomplete [color = 1]
    completed [color = 2]
    incomplete -> empty1 [label = "normal"]
    completed -> empty2 [arrowhead = onormal,label = "bidirectional\lpropagation"]
  }
)dot";

  const size_t numSubgraphs = graph.numSubgraphs();
  for (size_t i = 0; i < numSubgraphs; ++i) {
    const SubgraphType& s = graph.subgraph(i);
    if (i != 0) {
      datfile << "  " << "subgraph cluster_" << i << " { label = \""
              << ::detail::getName(&s, i, nodeNames) << "\"\n";
    }
    const size_t numNodes = s.numNodes();
    for (size_t j = 0; j < numNodes; ++j) {
      const NodeType& node = s.node(j);
      datfile << "    " << reinterpret_cast<uintptr_t>(&node)
              << " [color = " << (node.isCompleted() ? 2 : 1);
      datfile << " label = \"" << ::detail::getName(&node, j, nodeNames) << "\"]\n";
    }

    if (i != 0) {
      datfile << "  }\n";
    }
  }

  graph.forEachNode([&](const NodeType& node) {
    node.forEachDependent([&](const dispenso::Node& d) {
      datfile << "    " << reinterpret_cast<uintptr_t>(&node) << " -> "
              << reinterpret_cast<uintptr_t>(&d);

      if (std::is_same<dispenso::BiPropNode, NodeType>::value) {
        const auto& node1 = static_cast<const dispenso::BiPropNode&>(node);
        const auto& node2 = static_cast<const dispenso::BiPropNode&>(d);
        datfile << (node1.isSameSet(node2) ? "[arrowhead=onormal]" : "");
      }
      datfile << '\n';
    });
  });

  datfile << "}";
  datfile.close();
}
} // namespace dispenso


================================================
FILE: docs/Doxyfile
================================================
# Doxyfile 1.8.14

# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
#
# All text after a double hash (##) is considered a comment and is placed in
# front of the TAG it is preceding.
#
# All text after a single hash (#) is considered a comment and will be ignored.
# The format is:
# TAG = value [value, ...]
# For lists, items can also be appended using:
# TAG += value [value, ...]
# Values that contain spaces should be placed between quotes (\" \").

#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------

# This tag specifies the encoding used for all characters in the config file
# that follow. The default is UTF-8 which is also the encoding used for all text
# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
# built into libc) for the transcoding. See
# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
# The default value is: UTF-8.

DOXYFILE_ENCODING      = UTF-8

# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
# double-quotes, unless you are using Doxywizard) that should identify the
# project for which the documentation is generated. This name is used in the
# title of most generated pages and in a few other places.
# The default value is: My Project.

PROJECT_NAME           = dispenso

# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER         = 1.5.1

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
# quick idea about the purpose of the project. Keep the description short.

PROJECT_BRIEF          = "A library for task parallelism"

# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
# in the documentation. The maximum height of the logo should not exceed 55
# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
# the logo to the output directory.

PROJECT_LOGO           = 

# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
# into which the generated documentation will be written. If a relative path is
# entered, it will be relative to the location where doxygen was started. If
# left blank the current directory will be used.

OUTPUT_DIRECTORY       = ./doxygen

# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
# directories (in 2 levels) under the output directory of each output format and
# will distribute the generated files over these directories. Enabling this
# option can be useful when feeding doxygen a huge amount of source files, where
# putting all generated files in the same directory would otherwise causes
# performance problems for the file system.
# The default value is: NO.

CREATE_SUBDIRS         = NO

# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
# characters to appear in the names of generated files. If set to NO, non-ASCII
# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
# U+3044.
# The default value is: NO.

ALLOW_UNICODE_NAMES    = NO

# The OUTPUT_LANGUAGE tag is used to specify the language in which all
# documentation generated by doxygen is written. Doxygen will use this
# information to generate all constant output in the proper language.
# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
# Ukrainian and Vietnamese.
# The default value is: English.

OUTPUT_LANGUAGE        = English

# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
# descriptions after the members that are listed in the file and class
# documentation (similar to Javadoc). Set to NO to disable this.
# The default value is: YES.

BRIEF_MEMBER_DESC      = YES

# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
# description of a member or function before the detailed description
#
# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
# brief descriptions will be completely suppressed.
# The default value is: YES.

REPEAT_BRIEF           = YES

# This tag implements a quasi-intelligent brief description abbreviator that is
# used to form the text in various listings. Each string in this list, if found
# as the leading text of the brief description, will be stripped from the text
# and the result, after processing the whole list, is used as the annotated
# text. Otherwise, the brief description is used as-is. If left blank, the
# following values are used ($name is automatically replaced with the name of
# the entity):The $name class, The $name widget, The $name file, is, provides,
# specifies, contains, represents, a, an and the.

ABBREVIATE_BRIEF       = "The $name class" \
                         "The $name widget" \
                         "The $name file" \
                         is \
                         provides \
                         specifies \
                         contains \
                         represents \
                         a \
                         an \
                         the

# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
# doxygen will generate a detailed section even if there is only a brief
# description.
# The default value is: NO.

ALWAYS_DETAILED_SEC    = NO

# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
# inherited members of a class in the documentation of that class as if those
# members were ordinary class members. Constructors, destructors and assignment
# operators of the base classes will not be shown.
# The default value is: NO.

INLINE_INHERITED_MEMB  = NO

# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
# before files name in the file list and in the header files. If set to NO the
# shortest path that makes the file name unique will be used
# The default value is: YES.

FULL_PATH_NAMES        = YES

# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
# Stripping is only done if one of the specified strings matches the left-hand
# part of the path. The tag can be used to show relative paths in the file list.
# If left blank the directory from which doxygen is run is used as the path to
# strip.
#
# Note that you can specify absolute paths here, but also relative paths, which
# will be relative from the directory where doxygen is started.
# This tag requires that the tag FULL_PATH_NAMES is set to YES.

STRIP_FROM_PATH        = 

# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
# path mentioned in the documentation of a class, which tells the reader which
# header file to include in order to use a class. If left blank only the name of
# the header file containing the class definition is used. Otherwise one should
# specify the list of include paths that are normally passed to the compiler
# using the -I flag.

STRIP_FROM_INC_PATH    = 

# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
# less readable) file names. This can be useful is your file systems doesn't
# support long names like on DOS, Mac, or CD-ROM.
# The default value is: NO.

SHORT_NAMES            = NO

# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
# first line (until the first dot) of a Javadoc-style comment as the brief
# description. If set to NO, the Javadoc-style will behave just like regular Qt-
# style comments (thus requiring an explicit @brief command for a brief
# description.)
# The default value is: NO.

JAVADOC_AUTOBRIEF      = NO

# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
# line (until the first dot) of a Qt-style comment as the brief description. If
# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
# requiring an explicit \brief command for a brief description.)
# The default value is: NO.

QT_AUTOBRIEF           = NO

# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
# a brief description. This used to be the default behavior. The new default is
# to treat a multi-line C++ comment block as a detailed description. Set this
# tag to YES if you prefer the old behavior instead.
#
# Note that setting this tag to YES also means that rational rose comments are
# not recognized any more.
# The default value is: NO.

MULTILINE_CPP_IS_BRIEF = NO

# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
# documentation from any documented member that it re-implements.
# The default value is: YES.

INHERIT_DOCS           = YES

# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
# page for each member. If set to NO, the documentation of a member will be part
# of the file/class/namespace that contains it.
# The default value is: NO.

SEPARATE_MEMBER_PAGES  = NO

# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
# uses this value to replace tabs by spaces in code fragments.
# Minimum value: 1, maximum value: 16, default value: 4.

TAB_SIZE               = 4

# This tag can be used to specify a number of aliases that act as commands in
# the documentation. An alias has the form:
# name=value
# For example adding
# "sideeffect=@par Side Effects:\n"
# will allow you to put the command \sideeffect (or @sideeffect) in the
# documentation, which will result in a user-defined paragraph with heading
# "Side Effects:". You can put \n's in the value part of an alias to insert
# newlines (in the resulting output). You can put ^^ in the value part of an
# alias to insert a newline as if a physical newline was in the original file.

ALIASES                = 

# This tag can be used to specify a number of word-keyword mappings (TCL only).
# A mapping has the form "name=value". For example adding "class=itcl::class"
# will allow you to use the command class in the itcl::class meaning.

TCL_SUBST              = 

# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
# only. Doxygen will then generate output that is more tailored for C. For
# instance, some of the names that are used will be different. The list of all
# members will be omitted, etc.
# The default value is: NO.

OPTIMIZE_OUTPUT_FOR_C  = NO

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
# Python sources only. Doxygen will then generate output that is more tailored
# for that language. For instance, namespaces will be presented as packages,
# qualified scopes will look different, etc.
# The default value is: NO.

OPTIMIZE_OUTPUT_JAVA   = NO

# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
# sources. Doxygen will then generate output that is tailored for Fortran.
# The default value is: NO.

OPTIMIZE_FOR_FORTRAN   = NO

# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
# sources. Doxygen will then generate output that is tailored for VHDL.
# The default value is: NO.

OPTIMIZE_OUTPUT_VHDL   = NO

# Doxygen selects the parser to use depending on the extension of the files it
# parses. With this tag you can assign which parser to use for a given
# extension. Doxygen has a built-in mapping, but you can override or extend it
# using this tag. The format is ext=language, where ext is a file extension, and
# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
# Fortran. In the later case the parser tries to guess whether the code is fixed
# or free formatted code, this is the default for Fortran type files), VHDL. For
# instance to make doxygen treat .inc files as Fortran files (default is PHP),
# and .f files as C (default is Fortran), use: inc=Fortran f=C.
#
# Note: For files without extension you can use no_extension as a placeholder.
#
# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
# the files are not read by doxygen.

EXTENSION_MAPPING      = 

# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
# according to the Markdown format, which allows for more readable
# documentation. See http://daringfireball.net/projects/markdown/ for details.
# The output of markdown processing is further processed by doxygen, so you can
# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
# case of backward compatibilities issues.
# The default value is: YES.

MARKDOWN_SUPPORT       = YES

# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
# to that level are automatically included in the table of contents, even if
# they do not have an id attribute.
# Note: This feature currently applies only to Markdown headings.
# Minimum value: 0, maximum value: 99, default value: 0.
# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.

TOC_INCLUDE_HEADINGS   = 0

# When enabled doxygen tries to link words that correspond to documented
# classes, or namespaces to their corresponding documentation. Such a link can
# be prevented in individual cases by putting a % sign in front of the word or
# globally by setting AUTOLINK_SUPPORT to NO.
# The default value is: YES.

AUTOLINK_SUPPORT       = YES

# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
# to include (a tag file for) the STL sources as input, then you should set this
# tag to YES in order to let doxygen match functions declarations and
# definitions whose arguments contain STL classes (e.g. func(std::string);
# versus func(std::string) {}). This also make the inheritance and collaboration
# diagrams that involve STL classes more complete and accurate.
# The default value is: NO.

BUILTIN_STL_SUPPORT    = NO

# If you use Microsoft's C++/CLI language, you should set this option to YES to
# enable parsing support.
# The default value is: NO.

CPP_CLI_SUPPORT        = NO

# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
# will parse them like normal C++ but will assume all classes use public instead
# of private inheritance when no explicit protection keyword is present.
# The default value is: NO.

SIP_SUPPORT            = NO

# For Microsoft's IDL there are propget and propput attributes to indicate
# getter and setter methods for a property. Setting this option to YES will make
# doxygen to replace the get and set methods by a property in the documentation.
# This will only work if the methods are indeed getting or setting a simple
# type. If this is not the case, or you want to show the methods anyway, you
# should set this option to NO.
# The default value is: YES.

IDL_PROPERTY_SUPPORT   = YES

# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
# tag is set to YES then doxygen will reuse the documentation of the first
# member in the group (if any) for the other members of the group. By default
# all members of a group must be documented explicitly.
# The default value is: NO.

DISTRIBUTE_GROUP_DOC   = NO

# If one adds a struct or class to a group and this option is enabled, then also
# any nested class or struct is added to the same group. By default this option
# is disabled and one has to add nested compounds explicitly via \ingroup.
# The default value is: NO.

GROUP_NESTED_COMPOUNDS = NO

# Set the SUBGROUPING tag to YES to allow class member groups of the same type
# (for instance a group of public functions) to be put as a subgroup of that
# type (e.g. under the Public Functions section). Set it to NO to prevent
# subgrouping. Alternatively, this can be done per class using the
# \nosubgrouping command.
# The default value is: YES.

SUBGROUPING            = YES

# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
# are shown inside the group in which they are included (e.g. using \ingroup)
# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
# and RTF).
#
# Note that this feature does not work in combination with
# SEPARATE_MEMBER_PAGES.
# The default value is: NO.

INLINE_GROUPED_CLASSES = NO

# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
# with only public data fields or simple typedef fields will be shown inline in
# the documentation of the scope in which they are defined (i.e. file,
# namespace, or group documentation), provided this scope is documented. If set
# to NO, structs, classes, and unions are shown on a separate page (for HTML and
# Man pages) or section (for LaTeX and RTF).
# The default value is: NO.

INLINE_SIMPLE_STRUCTS  = NO

# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
# enum is documented as struct, union, or enum with the name of the typedef. So
# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
# with name TypeT. When disabled the typedef will appear as a member of a file,
# namespace, or class. And the struct will be named TypeS. This can typically be
# useful for C code in case the coding convention dictates that all compound
# types are typedef'ed and only the typedef is referenced, never the tag name.
# The default value is: NO.

TYPEDEF_HIDES_STRUCT   = NO

# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
# cache is used to resolve symbols given their name and scope. Since this can be
# an expensive process and often the same symbol appears multiple times in the
# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
# doxygen will become slower. If the cache is too large, memory is wasted. The
# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
# symbols. At the end of a run doxygen will report the cache usage and suggest
# the optimal cache size from a speed point of view.
# Minimum value: 0, maximum value: 9, default value: 0.

LOOKUP_CACHE_SIZE      = 0

#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------

# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
# documentation are documented, even if no documentation was available. Private
# class members and static file members will be hidden unless the
# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
# Note: This will also disable the warnings about undocumented members that are
# normally produced when WARNINGS is set to YES.
# The default value is: NO.

EXTRACT_ALL            = NO

# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
# be included in the documentation.
# The default value is: NO.

EXTRACT_PRIVATE        = NO

# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
# scope will be included in the documentation.
# The default value is: NO.

EXTRACT_PACKAGE        = NO

# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
# included in the documentation.
# The default value is: NO.

EXTRACT_STATIC         = NO

# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
# locally in source files will be included in the documentation. If set to NO,
# only classes defined in header files are included. Does not have any effect
# for Java sources.
# The default value is: YES.

EXTRACT_LOCAL_CLASSES  = YES

# This flag is only useful for Objective-C code. If set to YES, local methods,
# which are defined in the implementation section but not in the interface are
# included in the documentation. If set to NO, only methods in the interface are
# included.
# The default value is: NO.

EXTRACT_LOCAL_METHODS  = NO

# If this flag is set to YES, the members of anonymous namespaces will be
# extracted and appear in the documentation as a namespace called
# 'anonymous_namespace{file}', where file will be replaced with the base name of
# the file that contains the anonymous namespace. By default anonymous namespace
# are hidden.
# The default value is: NO.

EXTRACT_ANON_NSPACES   = NO

# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
# undocumented members inside documented classes or files. If set to NO these
# members will be included in the various overviews, but no documentation
# section is generated. This option has no effect if EXTRACT_ALL is enabled.
# The default value is: NO.

HIDE_UNDOC_MEMBERS     = NO

# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
# undocumented classes that are normally visible in the class hierarchy. If set
# to NO, these classes will be included in the various overviews. This option
# has no effect if EXTRACT_ALL is enabled.
# The default value is: NO.

HIDE_UNDOC_CLASSES     = NO

# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
# (class|struct|union) declarations. If set to NO, these declarations will be
# included in the documentation.
# The default value is: NO.

HIDE_FRIEND_COMPOUNDS  = NO

# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
# documentation blocks found inside the body of a function. If set to NO, these
# blocks will be appended to the function's detailed documentation block.
# The default value is: NO.

HIDE_IN_BODY_DOCS      = NO

# The INTERNAL_DOCS tag determines if documentation that is typed after a
# \internal command is included. If the tag is set to NO then the documentation
# will be excluded. Set it to YES to include the internal documentation.
# The default value is: NO.

INTERNAL_DOCS          = NO

# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
# names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# and Mac users are advised to set this option to NO.
# The default value is: system dependent.

CASE_SENSE_NAMES       = NO

# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
# their full class and namespace scopes in the documentation. If set to YES, the
# scope will be hidden.
# The default value is: NO.

HIDE_SCOPE_NAMES       = NO

# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
# append additional text to a page's title, such as Class Reference. If set to
# YES the compound reference will be hidden.
# The default value is: NO.

HIDE_COMPOUND_REFERENCE= NO

# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
# the files that are included by a file in the documentation of that file.
# The default value is: YES.

SHOW_INCLUDE_FILES     = YES

# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
# grouped member an include statement to the documentation, telling the reader
# which file to include in order to use the member.
# The default value is: NO.

SHOW_GROUPED_MEMB_INC  = NO

# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
# files with double quotes in the documentation rather than with sharp brackets.
# The default value is: NO.

FORCE_LOCAL_INCLUDES   = NO

# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
# documentation for inline members.
# The default value is: YES.

INLINE_INFO            = YES

# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
# (detailed) documentation of file and class members alphabetically by member
# name. If set to NO, the members will appear in declaration order.
# The default value is: YES.

SORT_MEMBER_DOCS       = YES

# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
# descriptions of file, namespace and class members alphabetically by member
# name. If set to NO, the members will appear in declaration order. Note that
# this will also influence the order of the classes in the class list.
# The default value is: NO.

SORT_BRIEF_DOCS        = NO

# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
# (brief and detailed) documentation of class members so that constructors and
# destructors are listed first. If set to NO the constructors will appear in the
# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
# member documentation.
# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
# detailed member documentation.
# The default value is: NO.

SORT_MEMBERS_CTORS_1ST = NO

# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
# of group names into alphabetical order. If set to NO the group names will
# appear in their defined order.
# The default value is: NO.

SORT_GROUP_NAMES       = NO

# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
# fully-qualified names, including namespaces. If set to NO, the class list will
# be sorted only by class name, not including the namespace part.
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
# Note: This option applies only to the class list, not to the alphabetical
# list.
# The default value is: NO.

SORT_BY_SCOPE_NAME     = NO

# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
# type resolution of all parameters of a function it will reject a match between
# the prototype and the implementation of a member function even if there is
# only one candidate or it is obvious which candidate to choose by doing a
# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
# accept a match between prototype and implementation in such cases.
# The default value is: NO.

STRICT_PROTO_MATCHING  = NO

# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
# list. This list is created by putting \todo commands in the documentation.
# The default value is: YES.

GENERATE_TODOLIST      = YES

# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
# list. This list is created by putting \test commands in the documentation.
# The default value is: YES.

GENERATE_TESTLIST      = YES

# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
# list. This list is created by putting \bug commands in the documentation.
# The default value is: YES.

GENERATE_BUGLIST       = YES

# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
# the deprecated list. This list is created by putting \deprecated commands in
# the documentation.
# The default value is: YES.

GENERATE_DEPRECATEDLIST= YES

# The ENABLED_SECTIONS tag can be used to enable conditional documentation
# sections, marked by \if <section_label> ... \endif and \cond <section_label>
# ... \endcond blocks.

ENABLED_SECTIONS       = 

# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
# initial value of a variable or macro / define can have for it to appear in the
# documentation. If the initializer consists of more lines than specified here
# it will be hidden. Use a value of 0 to hide initializers completely. The
# appearance of the value of individual variables and macros / defines can be
# controlled using \showinitializer or \hideinitializer command in the
# documentation regardless of this setting.
# Minimum value: 0, maximum value: 10000, default value: 30.

MAX_INITIALIZER_LINES  = 30

# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
# the bottom of the documentation of classes and structs. If set to YES, the
# list will mention the files that were used to generate the documentation.
# The default value is: YES.

SHOW_USED_FILES        = YES

# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
# will remove the Files entry from the Quick Index and from the Folder Tree View
# (if specified).
# The default value is: YES.

SHOW_FILES             = YES

# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
# page. This will remove the Namespaces entry from the Quick Index and from the
# Folder Tree View (if specified).
# The default value is: YES.

SHOW_NAMESPACES        = YES

# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from
# the version control system). Doxygen will invoke the program by executing (via
# popen()) the command command input-file, where command is the value of the
# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
# by doxygen. Whatever the program writes to standard output is used as the file
# version. For an example see the documentation.

FILE_VERSION_FILTER    = 

# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
# by doxygen. The layout file controls the global structure of the generated
# output files in an output format independent way. To create the layout file
# that represents doxygen's defaults, run doxygen with the -l option. You can
# optionally specify a file name after the option, if omitted DoxygenLayout.xml
# will be used as the name of the layout file.
#
# Note that if you run doxygen from a directory containing a file called
# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
# tag is left empty.

LAYOUT_FILE            = 

# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
# the reference definitions. This must be a list of .bib files. The .bib
# extension is automatically appended if omitted. This requires the bibtex tool
# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
# For LaTeX the style of the bibliography can be controlled using
# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
# search path. See also \cite for info how to create references.

CITE_BIB_FILES         = 

#---------------------------------------------------------------------------
# Configuration options related to warning and progress messages
#---------------------------------------------------------------------------

# The QUIET tag can be used to turn on/off the messages that are generated to
# standard output by doxygen. If QUIET is set to YES this implies that the
# messages are off.
# The default value is: NO.

QUIET                  = NO

# The WARNINGS tag can be used to turn on/off the warning messages that are
# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
# this implies that the warnings are on.
#
# Tip: Turn warnings on while writing the documentation.
# The default value is: YES.

WARNINGS               = YES

# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
# will automatically be disabled.
# The default value is: YES.

WARN_IF_UNDOCUMENTED   = YES

# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
# potential errors in the documentation, such as not documenting some parameters
# in a documented function, or documenting parameters that don't exist or using
# markup commands wrongly.
# The default value is: YES.

WARN_IF_DOC_ERROR      = YES

# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
# are documented, but have no documentation for their parameters or return
# value. If set to NO, doxygen will only warn about wrong or incomplete
# parameter documentation, but not about the absence of documentation.
# The default value is: NO.

WARN_NO_PARAMDOC       = NO

# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
# a warning is encountered.
# The default value is: NO.

WARN_AS_ERROR          = NO

# The WARN_FORMAT tag determines the format of the warning messages that doxygen
# can produce. The string should contain the $file, $line, and $text tags, which
# will be replaced by the file and line number from which the warning originated
# and the warning text. Optionally the format may contain $version, which will
# be replaced by the version of the file (if it could be obtained via
# FILE_VERSION_FILTER)
# The default value is: $file:$line: $text.

WARN_FORMAT            = "$file:$line: $text"

# The WARN_LOGFILE tag can be used to specify a file to which warning and error
# messages should be written. If left blank the output is written to standard
# error (stderr).

WARN_LOGFILE           = doxygen_warnings.log

#---------------------------------------------------------------------------
# Configuration options related to the input files
#---------------------------------------------------------------------------

# The INPUT tag is used to specify the files and/or directories that contain
# documented source files. You may enter file names like myfile.cpp or
# directories like /usr/src/myproject. Separate the files or directories with
# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
# Note: If this tag is empty the current directory is searched.

INPUT                  = ../dispenso mainpage.md getting_started.md groups.dox

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
# possible encodings.
# The default value is: UTF-8.

INPUT_ENCODING         = UTF-8

# If the value of the INPUT tag contains directories, you can use the
# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
# *.h) to filter out the source-files in the directories.
#
# Note that for custom extensions or not directly supported extensions you also
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# read by doxygen.
#
# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.

FILE_PATTERNS          = *.h \
                         *.hh \
                         *.hxx \
                         *.hpp \
                         *.h++ \
                         *.inl \
                         *.markdown \
                         *.md \
                         *.dox

# The RECURSIVE tag can be used to specify whether or not subdirectories should
# be searched for input files as well.
# The default value is: NO.

RECURSIVE              = NO

# The EXCLUDE tag can be used to specify files and/or directories that should be
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
#
# Note that relative paths are relative to the directory from which doxygen is
# run.

EXCLUDE                = 

# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
# directories that are symbolic links (a Unix file system feature) are excluded
# from the input.
# The default value is: NO.

EXCLUDE_SYMLINKS       = NO

# If the value of the INPUT tag contains directories, you can use the
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
# certain files from those directories.
#
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories for example use the pattern */test/*

EXCLUDE_PATTERNS       = dispenso::detail::*

# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
# (namespaces, classes, functions, etc.) that should be excluded from the
# output. The symbol name can be a fully qualified name, a word, or if the
# wildcard * is used, a substring. Examples: ANamespace, AClass,
# AClass::ANamespace, ANamespace::*Test
#
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories use the pattern */test/*

EXCLUDE_SYMBOLS        = dispenso::detail::* \
                         *detail* \
                         DISPENSO_* \
                         TaskSetBase \
                         ChunkedRange \
                         ForceQueuingTag \
                         value_type \
                         reference \
                         const_reference \
                         size_type \
                         difference_type \
                         reference_type \
                         const_reference_type \
                         pointer \
                         const_pointer \
                         iterator \
                         const_iterator \
                         reverse_iterator \
                         const_reverse_iterator \
                         NodeType \
                         SubgraphType \
                         OpResult \
                         ssize_t \
                         kDefault* \
                         kImmediate* \
                         kNewThread* \
                         kCompleted \
                         kStatic \
                         ParentCascadeCancel \
                         operator== \
                         operator!= \
                         operator< \
                         operator> \
                         operator<= \
                         operator>= \
                         operator* \
                         "operator new" \
                         "operator delete" \
                         swap \
                         share \
                         then \
                         numIncompletePredecessors_ \
                         numPredecessors_ \
                         dependsOnOneNode \
                         DO_PRAGMA \
                         dispenso::cv::* \
                         *cv::* \
                         alloc \
                         dealloc

# The EXAMPLE_PATH tag can be used to specify one or more files or directories
# that contain example code fragments that are included (see the \include
# command).

EXAMPLE_PATH           = ../examples

# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
# *.h) to filter out the source-files in the directories. If left blank all
# files are included.

EXAMPLE_PATTERNS       = *.cpp

# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
# searched for input files to be used with the \include or \dontinclude commands
# irrespective of the value of the RECURSIVE tag.
# The default value is: NO.

EXAMPLE_RECURSIVE      = NO

# The IMAGE_PATH tag can be used to specify one or more files or directories
# that contain images that are to be included in the documentation (see the
# \image command).

IMAGE_PATH             = 

# The INPUT_FILTER tag can be used to specify a program that doxygen should
# invoke to filter for each input file. Doxygen will invoke the filter program
# by executing (via popen()) the command:
#
# <filter> <input-file>
#
# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
# name of an input file. Doxygen will then use the output that the filter
# program writes to standard output. If FILTER_PATTERNS is specified, this tag
# will be ignored.
#
# Note that the filter must not add or remove lines; it is applied before the
# code is scanned, but not when the output code is generated. If lines are added
# or removed, the anchors will not be placed correctly.
#
# Note that for custom extensions or not directly supported extensions you also
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# properly processed by doxygen.

INPUT_FILTER           = 

# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
# basis. Doxygen will compare the file name with each pattern and apply the
# filter if there is a match. The filters are a list of the form: pattern=filter
# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
# patterns match the file name, INPUT_FILTER is applied.
#
# Note that for custom extensions or not directly supported extensions you also
# need to set EXTENSION_MAPPING for the extension otherwise the files are not
# properly processed by doxygen.

FILTER_PATTERNS        = 

# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
# INPUT_FILTER) will also be used to filter the input files that are used for
# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
# The default value is: NO.

FILTER_SOURCE_FILES    = NO

# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
# it is also possible to disable source filtering for a specific pattern using
# *.ext= (so without naming a filter).
# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.

FILTER_SOURCE_PATTERNS = 

# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
# is part of the input, its contents will be placed on the main page
# (index.html). This can be useful if you have a project on for instance GitHub
# and want to reuse the introduction page also for the doxygen output.

USE_MDFILE_AS_MAINPAGE = mainpage.md

#---------------------------------------------------------------------------
# Configuration options related to source browsing
#---------------------------------------------------------------------------

# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
# generated. Documented entities will be cross-referenced with these sources.
#
# Note: To get rid of all source code in the generated output, make sure that
# also VERBATIM_HEADERS is set to NO.
# The default value is: NO.

SOURCE_BROWSER         = YES

# Setting the INLINE_SOURCES tag to YES will include the body of functions,
# classes and enums directly into the documentation.
# The default value is: NO.

INLINE_SOURCES         = NO

# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
# special comment blocks from generated source code fragments. Normal C, C++ and
# Fortran comments will always remain visible.
# The default value is: YES.

STRIP_CODE_COMMENTS    = YES

# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
# function all documented functions referencing it will be listed.
# The default value is: NO.

REFERENCED_BY_RELATION = NO

# If the REFERENCES_RELATION tag is set to YES then for each documented function
# all documented entities called/used by that function will be listed.
# The default value is: NO.

REFERENCES_RELATION    = NO

# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
# to YES then the hyperlinks from functions in REFERENCES_RELATION and
# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
# link to the documentation.
# The default value is: YES.

REFERENCES_LINK_SOURCE = YES

# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
# source code will show a tooltip with additional information such as prototype,
# brief description and links to the definition and documentation. Since this
# will make the HTML file larger and loading of large files a bit slower, you
# can opt to disable this feature.
# The default value is: YES.
# This tag requires that the tag SOURCE_BROWSER is set to YES.

SOURCE_TOOLTIPS        = YES

# If the USE_HTAGS tag is set to YES then the references to source code will
# point to the HTML generated by the htags(1) tool instead of doxygen built-in
# source browser. The htags tool is part of GNU's global source tagging system
# (see https://www.gnu.org/software/global/global.html). You will need version
# 4.8.6 or higher.
#
# To use it do the following:
# - Install the latest version of global
# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
# - Make sure the INPUT points to the root of the source tree
# - Run doxygen as normal
#
# Doxygen will invoke htags (and that will in turn invoke gtags), so these
# tools must be available from the command line (i.e. in the search path).
#
# The result: instead of the source browser generated by doxygen, the links to
# source code will now point to the output of htags.
# The default value is: NO.
# This tag requires that the tag SOURCE_BROWSER is set to YES.

USE_HTAGS              = NO

# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
# verbatim copy of the header file for each class for which an include is
# specified. Set to NO to disable this.
# See also: Section \class.
# The default value is: YES.

VERBATIM_HEADERS       = YES

#---------------------------------------------------------------------------
# Configuration options related to the alphabetical class index
#---------------------------------------------------------------------------

# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
# compounds will be generated. Enable this if the project contains a lot of
# classes, structs, unions or interfaces.
# The default value is: YES.

ALPHABETICAL_INDEX     = YES

# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
# which the alphabetical index list will be split.
# Minimum value: 1, maximum value: 20, default value: 5.
# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.

COLS_IN_ALPHA_INDEX    = 5

# In case all classes in a project start with a common prefix, all classes will
# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
# can be used to specify a prefix (or a list of prefixes) that should be ignored
# while generating the index headers.
# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.

IGNORE_PREFIX          = 

#---------------------------------------------------------------------------
# Configuration options related to the HTML output
#---------------------------------------------------------------------------

# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
# The default value is: YES.

GENERATE_HTML          = YES

# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
# it.
# The default directory is: html.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_OUTPUT            = html

# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
# generated HTML page (for example: .htm, .php, .asp).
# The default value is: .html.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_FILE_EXTENSION    = .html

# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
# each generated HTML page. If the tag is left blank doxygen will generate a
# standard header.
#
# To get valid HTML the header file that includes any scripts and style sheets
# that doxygen needs, which is dependent on the configuration options used (e.g.
# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
# default header using
# doxygen -w html new_header.html new_footer.html new_stylesheet.css
# YourConfigFile
# and then modify the file new_header.html. See also section "Doxygen usage"
# for information on how to generate the default header that doxygen normally
# uses.
# Note: The header is subject to change so you typically have to regenerate the
# default header when upgrading to a newer version of doxygen. For a description
# of the possible markers and block names see the documentation.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_HEADER            = header.html

# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
# generated HTML page. If the tag is left blank doxygen will generate a standard
# footer. See HTML_HEADER for more information on how to generate a default
# footer and what special commands can be used inside the footer. See also
# section "Doxygen usage" for information on how to generate the default footer
# that doxygen normally uses.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_FOOTER            = 

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
# sheet that is used by each HTML page. It can be used to fine-tune the look of
# the HTML output. If left blank doxygen will generate a default style sheet.
# See also section "Doxygen usage" for information on how to generate the style
# sheet that doxygen normally uses.
# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
# it is more robust and this tag (HTML_STYLESHEET) will in the future become
# obsolete.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_STYLESHEET        = 

# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
# cascading style sheets that are included after the standard style sheets
# created by doxygen. Using this option one can overrule certain style aspects.
# This is preferred over using HTML_STYLESHEET since it does not replace the
# standard style sheet and is therefore more robust against future updates.
# Doxygen will copy the style sheet files to the output directory.
# Note: The order of the extra style sheet files is of importance (e.g. the last
# style sheet in the list overrules the setting of the previous ones in the
# list). For an example see the documentation.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_EXTRA_STYLESHEET  = third-party/doxygen-awesome/doxygen-awesome.css custom.css

# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
# other source files which should be copied to the HTML output directory. Note
# that these files will be copied to the base HTML output directory. Use the
# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
# files will be copied as-is; there are no commands or markers available.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_EXTRA_FILES       = third-party/doxygen-awesome/doxygen-awesome-darkmode-toggle.js

# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
# will adjust the colors in the style sheet and background images according to
# this color. Hue is specified as an angle on a colorwheel, see
# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
# purple, and 360 is red again.
# Minimum value: 0, maximum value: 359, default value: 220.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_COLORSTYLE_HUE    = 124

# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
# in the HTML output. For a value of 0 the output will use grayscales only. A
# value of 255 will produce the most vivid colors.
# Minimum value: 0, maximum value: 255, default value: 100.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_COLORSTYLE_SAT    = 100

# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
# luminance component of the colors in the HTML output. Values below 100
# gradually make the output lighter, whereas values above 100 make the output
# darker. The value divided by 100 is the actual gamma applied, so 80 represents
# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
# change the gamma.
# Minimum value: 40, maximum value: 240, default value: 80.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_COLORSTYLE_GAMMA  = 80

# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
# page will contain the date and time when the page was generated. Setting this
# to YES can help to show when doxygen was last run and thus if the
# documentation is up to date.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_TIMESTAMP         = NO

# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
# documentation will contain a main index with vertical navigation menus that
# are dynamically created via Javascript. If disabled, the navigation index will
# consists of multiple levels of tabs that are statically embedded in every HTML
# page. Disable this option to support browsers that do not have Javascript,
# like the Qt help browser.
# The default value is: YES.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_DYNAMIC_MENUS     = YES

# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
# documentation will contain sections that can be hidden and shown after the
# page has loaded.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_DYNAMIC_SECTIONS  = NO

# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
# shown in the various tree structured indices initially; the user can expand
# and collapse entries dynamically later on. Doxygen will expand the tree to
# such a level that at most the specified number of entries are visible (unless
# a fully collapsed tree already exceeds this amount). So setting the number of
# entries 1 will produce a full collapsed tree by default. 0 is a special value
# representing an infinite number of entries and will result in a full expanded
# tree by default.
# Minimum value: 0, maximum value: 9999, default value: 100.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_INDEX_NUM_ENTRIES = 100

# If the GENERATE_DOCSET tag is set to YES, additional index files will be
# generated that can be used as input for Apple's Xcode 3 integrated development
# environment (see: https://developer.apple.com/tools/xcode/), introduced with
# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
# Makefile in the HTML output directory. Running make will produce the docset in
# that directory and running make install will install the docset in
# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
# startup. See https://developer.apple.com/tools/creatingdocsetswithdoxygen.html
# for more information.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

GENERATE_DOCSET        = NO

# This tag determines the name of the docset feed. A documentation feed provides
# an umbrella under which multiple documentation sets from a single provider
# (such as a company or product suite) can be grouped.
# The default value is: Doxygen generated docs.
# This tag requires that the tag GENERATE_DOCSET is set to YES.

DOCSET_FEEDNAME        = "Doxygen generated docs"

# This tag specifies a string that should uniquely identify the documentation
# set bundle. This should be a reverse domain-name style string, e.g.
# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
# The default value is: org.doxygen.Project.
# This tag requires that the tag GENERATE_DOCSET is set to YES.

DOCSET_BUNDLE_ID       = org.doxygen.Project

# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
# the documentation publisher. This should be a reverse domain-name style
# string, e.g. com.mycompany.MyDocSet.documentation.
# The default value is: org.doxygen.Publisher.
# This tag requires that the tag GENERATE_DOCSET is set to YES.

DOCSET_PUBLISHER_ID    = org.doxygen.Publisher

# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
# The default value is: Publisher.
# This tag requires that the tag GENERATE_DOCSET is set to YES.

DOCSET_PUBLISHER_NAME  = Publisher

# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
# Windows.
#
# The HTML Help Workshop contains a compiler that can convert all HTML output
# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
# files are now used as the Windows 98 help format, and will replace the old
# Windows help format (.hlp) on all Windows platforms in the future. Compressed
# HTML files also contain an index, a table of contents, and you can search for
# words in the documentation. The HTML workshop also contains a viewer for
# compressed HTML files.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

GENERATE_HTMLHELP      = NO

# The CHM_FILE tag can be used to specify the file name of the resulting .chm
# file. You can add a path in front of the file if the result should not be
# written to the html output directory.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.

CHM_FILE               = 

# The HHC_LOCATION tag can be used to specify the location (absolute path
# including file name) of the HTML help compiler (hhc.exe). If non-empty,
# doxygen will try to run the HTML help compiler on the generated index.hhp.
# The file has to be specified with full path.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.

HHC_LOCATION           = 

# The GENERATE_CHI flag controls if a separate .chi index file is generated
# (YES) or that it should be included in the master .chm file (NO).
# The default value is: NO.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.

GENERATE_CHI           = NO

# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
# and project file content.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.

CHM_INDEX_ENCODING     = 

# The BINARY_TOC flag controls whether a binary table of contents is generated
# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
# enables the Previous and Next buttons.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.

BINARY_TOC             = NO

# The TOC_EXPAND flag can be set to YES to add extra items for group members to
# the table of contents of the HTML help documentation and to the tree view.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.

TOC_EXPAND             = NO

# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
# (.qch) of the generated HTML documentation.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

GENERATE_QHP           = NO

# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
# the file name of the resulting .qch file. The path specified is relative to
# the HTML output folder.
# This tag requires that the tag GENERATE_QHP is set to YES.

QCH_FILE               = 

# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
# Project output. For more information please see Qt Help Project / Namespace
# (see: http://doc.qt.io/qt-4.8/qthelpproject.html#namespace).
# The default value is: org.doxygen.Project.
# This tag requires that the tag GENERATE_QHP is set to YES.

QHP_NAMESPACE          = org.doxygen.Project

# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
# Help Project output. For more information please see Qt Help Project / Virtual
# Folders (see: http://doc.qt.io/qt-4.8/qthelpproject.html#virtual-folders).
# The default value is: doc.
# This tag requires that the tag GENERATE_QHP is set to YES.

QHP_VIRTUAL_FOLDER     = doc

# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
# filter to add. For more information please see Qt Help Project / Custom
# Filters (see: http://doc.qt.io/qt-4.8/qthelpproject.html#custom-filters).
# This tag requires that the tag GENERATE_QHP is set to YES.

QHP_CUST_FILTER_NAME   = 

# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
# custom filter to add. For more information please see Qt Help Project / Custom
# Filters (see: http://doc.qt.io/qt-4.8/qthelpproject.html#custom-filters).
# This tag requires that the tag GENERATE_QHP is set to YES.

QHP_CUST_FILTER_ATTRS  = 

# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
# project's filter section matches. Qt Help Project / Filter Attributes (see:
# http://doc.qt.io/qt-4.8/qthelpproject.html#filter-attributes).
# This tag requires that the tag GENERATE_QHP is set to YES.

QHP_SECT_FILTER_ATTRS  = 

# The QHG_LOCATION tag can be used to specify the location of Qt's
# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
# generated .qhp file.
# This tag requires that the tag GENERATE_QHP is set to YES.

QHG_LOCATION           = 

# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
# generated, together with the HTML files, they form an Eclipse help plugin. To
# install this plugin and make it available under the help contents menu in
# Eclipse, the contents of the directory containing the HTML and XML files needs
# to be copied into the plugins directory of eclipse. The name of the directory
# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
# After copying Eclipse needs to be restarted before the help appears.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

GENERATE_ECLIPSEHELP   = NO

# A unique identifier for the Eclipse help plugin. When installing the plugin
# the directory name containing the HTML and XML files should also have this
# name. Each documentation set should have its own identifier.
# The default value is: org.doxygen.Project.
# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.

ECLIPSE_DOC_ID         = org.doxygen.Project

# If you want full control over the layout of the generated HTML pages it might
# be necessary to disable the index and replace it with your own. The
# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
# of each HTML page. A value of NO enables the index and the value YES disables
# it. Since the tabs in the index contain the same information as the navigation
# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

DISABLE_INDEX          = NO

# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
# structure should be generated to display hierarchical information. If the tag
# value is set to YES, a side panel will be generated containing a tree-like
# index structure (just like the one that is generated for HTML Help). For this
# to work a browser that supports JavaScript, DHTML, CSS and frames is required
# (i.e. any modern browser). Windows users are probably better off using the
# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
# further fine-tune the look of the index. As an example, the default style
# sheet generated by doxygen has an example that shows how to put an image at
# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
# the same information as the tab index, you could consider setting
# DISABLE_INDEX to YES when enabling this option.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

GENERATE_TREEVIEW      = YES

# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
# doxygen will group on one line in the generated HTML documentation.
#
# Note that a value of 0 will completely suppress the enum values from appearing
# in the overview section.
# Minimum value: 0, maximum value: 20, default value: 4.
# This tag requires that the tag GENERATE_HTML is set to YES.

ENUM_VALUES_PER_LINE   = 4

# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
# to set the initial width (in pixels) of the frame in which the tree is shown.
# Minimum value: 0, maximum value: 1500, default value: 250.
# This tag requires that the tag GENERATE_HTML is set to YES.

TREEVIEW_WIDTH         = 250

# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
# external symbols imported via tag files in a separate window.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

EXT_LINKS_IN_WINDOW    = NO

# Use this tag to change the font size of LaTeX formulas included as images in
# the HTML documentation. When you change the font size after a successful
# doxygen run you need to manually remove any form_*.png images from the HTML
# output directory to force them to be regenerated.
# Minimum value: 8, maximum value: 50, default value: 10.
# This tag requires that the tag GENERATE_HTML is set to YES.

FORMULA_FONTSIZE       = 10

# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
# generated for formulas are transparent PNGs. Transparent PNGs are not
# supported properly for IE 6.0, but are supported on all modern browsers.
#
# Note that when changing this option you need to delete any form_*.png files in
# the HTML output directory before the changes have effect.
# The default value is: YES.
# This tag requires that the tag GENERATE_HTML is set to YES.

FORMULA_TRANSPARENT    = YES

# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
# https://www.mathjax.org) which uses client side Javascript for the rendering
# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
# installed or if you want to formulas look prettier in the HTML output. When
# enabled you may also need to install MathJax separately and configure the path
# to it using the MATHJAX_RELPATH option.
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.

USE_MATHJAX            = NO

# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
# http://docs.mathjax.org/en/latest/output.html) for more details.
# Possible values are: HTML-CSS (which is slower, but has the best
# compatibility), NativeMML (i.e. MathML) and SVG.
# The default value is: HTML-CSS.
# This tag requires that the tag USE_MATHJAX is set to YES.

MATHJAX_FORMAT         = HTML-CSS

# When MathJax is enabled you need to specify the location relative to the HTML
# output directory using the MATHJAX_RELPATH option. The destination directory
# should contain the MathJax.js script. For instance, if the mathjax directory
# is located at the same level as the HTML output directory, then
# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
# Content Delivery Network so you can quickly see the result without installing
# MathJax. However, it is strongly recommended to install a local copy of
# MathJax from https://www.mathjax.org before deployment.
# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/.
# This tag requires that the tag USE_MATHJAX is set to YES.

MATHJAX_RELPATH        = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/

# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
# extension names that should be enabled during MathJax rendering. For example
# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
# This tag requires that the tag USE_MATHJAX is set to YES.

MATHJAX_EXTENSIONS     = 

# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
# of code that will be used on startup of the MathJax code. See the MathJax site
# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
# example see the documentation.
# This tag requires that the tag USE_MATHJAX is set to YES.

MATHJAX_CODEFILE       = 

# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
# the HTML output. The underlying search engine uses javascript and DHTML and
# should work on any modern browser. Note that when using HTML help
# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
# there is already a search function so this one should typically be disabled.
# For large projects the javascript based search engine can be slow, then
# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
# search using the keyboard; to jump to the search box use <access key> + S
# (what the <access key> is depends on the OS and browser, but it is typically
# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
# key> to jump into the search results window, the results can be navigated
# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
# the search. The filter options can be selected when the cursor is inside the
# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
# to select a filter and <Enter> or <escape> to activate or cancel the filter
# option.
# The default value is: YES.
# This tag requires that the tag GENERATE_HTML is set to YES.

SEARCHENGINE           = YES

# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
# implemented using a web server instead of a web client using Javascript. There
# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
# setting. When disabled, doxygen will generate a PHP script for searching and
# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
# and searching needs to be provided by external tools. See the section
# "External Indexing and Searching" for details.
# The default value is: NO.
# This tag requires that the tag SEARCHENGINE is set to YES.

SERVER_BASED_SEARCH    = NO

# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
# script for searching. Instead the search results are written to an XML file
# which needs to be processed by an external indexer. Doxygen will invoke an
# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
# search results.
#
# Doxygen ships with an example indexer (doxyindexer) and search engine
# (doxysearch.cgi) which are based on the open source search engine library
# Xapian (see: https://xapian.org/).
#
# See the section "External Indexing and Searching" for details.
# The default value is: NO.
# This tag requires that the tag SEARCHENGINE is set to YES.

EXTERNAL_SEARCH        = NO

# The SEARCHENGINE_URL should point to a search engine hosted by a web server
# which will return the search results when EXTERNAL_SEARCH is enabled.
#
# Doxygen ships with an example indexer (doxyindexer) and search engine
# (doxysearch.cgi) which are based on the open source search engine library
# Xapian (see: https://xapian.org/). See the section "External Indexing and
# Searching" for details.
# This tag requires that the tag SEARCHENGINE is set to YES.

SEARCHENGINE_URL       = 

# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
# search data is written to a file for indexing by an external tool. With the
# SEARCHDATA_FILE tag the name of this file can be specified.
# The default file is: searchdata.xml.
# This tag requires that the tag SEARCHENGINE is set to YES.

SEARCHDATA_FILE        = searchdata.xml

# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
# projects and redirect the results back to the right project.
# This tag requires that the tag SEARCHENGINE is set to YES.

EXTERNAL_SEARCH_ID     = 

# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
# projects other than the one defined by this configuration file, but that are
# all added to the same external search index. Each project needs to have a
# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
# to a relative location where the documentation can be found. The format is:
# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
# This tag requires that the tag SEARCHENGINE is set to YES.

EXTRA_SEARCH_MAPPINGS  = 

#---------------------------------------------------------------------------
# Configuration options related to the LaTeX output
#---------------------------------------------------------------------------

# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
# The default value is: YES.

GENERATE_LATEX         = NO

# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
# it.
# The default directory is: latex.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_OUTPUT           = latex

# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
# invoked.
#
# Note that when enabling USE_PDFLATEX this option is only used for generating
# bitmaps for formulas in the HTML output, but not in the Makefile that is
# written to the output directory.
# The default file is: latex.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_CMD_NAME         = latex

# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
# index for LaTeX.
# The default file is: makeindex.
# This tag requires that the tag GENERATE_LATEX is set to YES.

MAKEINDEX_CMD_NAME     = makeindex

# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
# documents. This may be useful for small projects and may help to save some
# trees in general.
# The default value is: NO.
# This tag requires that the tag GENERATE_LATEX is set to YES.

COMPACT_LATEX          = NO

# The PAPER_TYPE tag can be used to set the paper type that is used by the
# printer.
# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
# 14 inches) and executive (7.25 x 10.5 inches).
# The default value is: a4.
# This tag requires that the tag GENERATE_LATEX is set to YES.

PAPER_TYPE             = a4

# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
# that should be included in the LaTeX output. The package can be specified just
# by its name or with the correct syntax as to be used with the LaTeX
# \usepackage command. To get the times font for instance you can specify :
# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
# To use the option intlimits with the amsmath package you can specify:
# EXTRA_PACKAGES=[intlimits]{amsmath}
# If left blank no extra packages will be included.
# This tag requires that the tag GENERATE_LATEX is set to YES.

EXTRA_PACKAGES         = 

# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
# generated LaTeX document. The header should contain everything until the first
# chapter. If it is left blank doxygen will generate a standard header. See
# section "Doxygen usage" for information on how to let doxygen write the
# default header to a separate file.
#
# Note: Only use a user-defined header if you know what you are doing! The
# following commands have a special meaning inside the header: $title,
# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
# string, for the replacement values of the other commands the user is referred
# to HTML_HEADER.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_HEADER           = 

# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
# generated LaTeX document. The footer should contain everything after the last
# chapter. If it is left blank doxygen will generate a standard footer. See
# LATEX_HEADER for more information on how to generate a default footer and what
# special commands can be used inside the footer.
#
# Note: Only use a user-defined footer if you know what you are doing!
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_FOOTER           = 

# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
# LaTeX style sheets that are included after the standard style sheets created
# by doxygen. Using this option one can overrule certain style aspects. Doxygen
# will copy the style sheet files to the output directory.
# Note: The order of the extra style sheet files is of importance (e.g. the last
# style sheet in the list overrules the setting of the previous ones in the
# list).
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_EXTRA_STYLESHEET = 

# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
# other source files which should be copied to the LATEX_OUTPUT output
# directory. Note that the files will be copied as-is; there are no commands or
# markers available.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_EXTRA_FILES      = 

# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
# contain links (just like the HTML output) instead of page references. This
# makes the output suitable for online browsing using a PDF viewer.
# The default value is: YES.
# This tag requires that the tag GENERATE_LATEX is set to YES.

PDF_HYPERLINKS         = YES

# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
# the PDF file directly from the LaTeX files. Set this option to YES, to get a
# higher quality PDF documentation.
# The default value is: YES.
# This tag requires that the tag GENERATE_LATEX is set to YES.

USE_PDFLATEX           = YES

# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
# command to the generated LaTeX files. This will instruct LaTeX to keep running
# if errors occur, instead of asking the user for help. This option is also used
# when generating formulas in HTML.
# The default value is: NO.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_BATCHMODE        = NO

# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
# index chapters (such as File Index, Compound Index, etc.) in the output.
# The default value is: NO.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_HIDE_INDICES     = NO

# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
# code with syntax highlighting in the LaTeX output.
#
# Note that which sources are shown also depends on other settings such as
# SOURCE_BROWSER.
# The default value is: NO.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_SOURCE_CODE      = NO

# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
# bibliography, e.g. plainnat, or ieeetr. See
# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
# The default value is: plain.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_BIB_STYLE        = plain

# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
# page will contain the date and time when the page was generated. Setting this
# to NO can help when comparing the output of multiple runs.
# The default value is: NO.
# This tag requires that the tag GENERATE_LATEX is set to YES.

LATEX_TIMESTAMP        = NO

#---------------------------------------------------------------------------
# Configuration options related to the RTF output
#---------------------------------------------------------------------------

# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
# RTF output is optimized for Word 97 and may not look too pretty with other RTF
# readers/editors.
# The default value is: NO.

GENERATE_RTF           = NO

# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
# it.
# The default directory is: rtf.
# This tag requires that the tag GENERATE_RTF is set to YES.

RTF_OUTPUT             = rtf

# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
# documents. This may be useful for small projects and may help to save some
# trees in general.
# The default value is: NO.
# This tag requires that the tag GENERATE_RTF is set to YES.

COMPACT_RTF            = NO

# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
# contain hyperlink fields. The RTF file will contain links (just like the HTML
# output) instead of page references. This makes the output suitable for online
# browsing using Word or some other Word compatible readers that support those
# fields.
#
# Note: WordPad (write) and others do not support links.
# The default value is: NO.
# This tag requires that the tag GENERATE_RTF is set to YES.

RTF_HYPERLINKS         = NO

# Load stylesheet definitions from file. Syntax is similar to doxygen's config
# file, i.e. a series of assignments. You only have to provide replacements,
# missing definitions are set to their default value.
#
# See also section "Doxygen usage" for information on how to generate the
# default style sheet that doxygen normally uses.
# This tag requires that the tag GENERATE_RTF is set to YES.

RTF_STYLESHEET_FILE    = 

# Set optional variables used in the generation of an RTF document. Syntax is
# similar to doxygen's config file. A template extensions file can be generated
# using doxygen -e rtf extensionFile.
# This tag requires that the tag GENERATE_RTF is set to YES.

RTF_EXTENSIONS_FILE    = 

# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
# with syntax highlighting in the RTF output.
#
# Note that which sources are shown also depends on other settings such as
# SOURCE_BROWSER.
# The default value is: NO.
# This tag requires that the tag GENERATE_RTF is set to YES.

RTF_SOURCE_CODE        = NO

#---------------------------------------------------------------------------
# Configuration options related to the man page output
#---------------------------------------------------------------------------

# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
# classes and files.
# The default value is: NO.

GENERATE_MAN           = NO

# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
# it. A directory man3 will be created inside the directory specified by
# MAN_OUTPUT.
# The default directory is: man.
# This tag requires that the tag GENERATE_MAN is set to YES.

MAN_OUTPUT             = man

# The MAN_EXTENSION tag determines the extension that is added to the generated
# man pages. In case the manual section does not start with a number, the number
# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
# optional.
# The default value is: .3.
# This tag requires that the tag GENERATE_MAN is set to YES.

MAN_EXTENSION          = .3

# The MAN_SUBDIR tag determines the name of the directory created within
# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
# MAN_EXTENSION with the initial . removed.
# This tag requires that the tag GENERATE_MAN is set to YES.

MAN_SUBDIR             = 

# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
# will generate one additional man file for each entity documented in the real
# man page(s). These additional files only source the real man page, but without
# them the man command would be unable to find the correct page.
# The default value is: NO.
# This tag requires that the tag GENERATE_MAN is set to YES.

MAN_LINKS              = NO

#---------------------------------------------------------------------------
# Configuration options related to the XML output
#---------------------------------------------------------------------------

# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
# captures the structure of the code including all documentation.
# The default value is: NO.

GENERATE_XML           = NO

# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
# it.
# The default directory is: xml.
# This tag requires that the tag GENERATE_XML is set to YES.

XML_OUTPUT             = xml

# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
# listings (including syntax highlighting and cross-referencing information) to
# the XML output. Note that enabling this will significantly increase the size
# of the XML output.
# The default value is: YES.
# This tag requires that the tag GENERATE_XML is set to YES.

XML_PROGRAMLISTING     = YES

#---------------------------------------------------------------------------
# Configuration options related to the DOCBOOK output
#---------------------------------------------------------------------------

# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
# that can be used to generate PDF.
# The default value is: NO.

GENERATE_DOCBOOK       = NO

# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
# front of it.
# The default directory is: docbook.
# This tag requires that the tag GENERATE_DOCBOOK is set to YES.

DOCBOOK_OUTPUT         = docbook

# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
# program listings (including syntax highlighting and cross-referencing
# information) to the DOCBOOK output. Note that enabling this will significantly
# increase the size of the DOCBOOK output.
# The default value is: NO.
# This tag requires that the tag GENERATE_DOCBOOK is set to YES.

DOCBOOK_PROGRAMLISTING = NO

#---------------------------------------------------------------------------
# Configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------

# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
# the structure of the code including all documentation. Note that this feature
# is still experimental and incomplete at the moment.
# The default value is: NO.

GENERATE_AUTOGEN_DEF   = NO

#---------------------------------------------------------------------------
# Configuration options related to the Perl module output
#---------------------------------------------------------------------------

# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
# file that captures the structure of the code including all documentation.
#
# Note that this feature is still experimental and incomplete at the moment.
# The default value is: NO.

GENERATE_PERLMOD       = NO

# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
# output from the Perl module output.
# The default value is: NO.
# This tag requires that the tag GENERATE_PERLMOD is set to YES.

PERLMOD_LATEX          = NO

# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
# formatted so it can be parsed by a human reader. This is useful if you want to
# understand what is going on. On the other hand, if this tag is set to NO, the
# size of the Perl module output will be much smaller and Perl will parse it
# just the same.
# The default value is: YES.
# This tag requires that the tag GENERATE_PERLMOD is set to YES.

PERLMOD_PRETTY         = YES

# The names of the make variables in the generated doxyrules.make file are
# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
# so different doxyrules.make files included by the same Makefile don't
# overwrite each other's variables.
# This tag requires that the tag GENERATE_PERLMOD is set to YES.

PERLMOD_MAKEVAR_PREFIX = 

#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------

# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
# C-preprocessor directives found in the sources and include files.
# The default value is: YES.

ENABLE_PREPROCESSING   = YES

# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
# in the source code. If set to NO, only conditional compilation will be
# performed. Macro expansion can be done in a controlled way by setting
# EXPAND_ONLY_PREDEF to YES.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

MACRO_EXPANSION        = NO

# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
# EXPAND_AS_DEFINED tags.
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

EXPAND_ONLY_PREDEF     = NO

# If the SEARCH_INCLUDES tag is set to YES, the include files in the
# INCLUDE_PATH will be searched if a #include is found.
# The default value is: YES.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

SEARCH_INCLUDES        = YES

# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by the
# preprocessor.
# This tag requires that the tag SEARCH_INCLUDES is set to YES.

INCLUDE_PATH           = .. \
                         ../dispenso/detail

# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
# directories. If left blank, the patterns specified with FILE_PATTERNS will be
# used.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

INCLUDE_FILE_PATTERNS  = 

# The PREDEFINED tag can be used to specify one or more macro names that are
# defined before the preprocessor is started (similar to the -D option of e.g.
# gcc). The argument of the tag is a list of macros of the form: name or
# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
# is assumed. To prevent a macro definition from being undefined via #undef or
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

PREDEFINED             = __GNUC__

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
# macro definition that is found in the sources will be used. Use the PREDEFINED
# tag if you want to use a different macro definition that overrules the
# definition found in the source code.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

EXPAND_AS_DEFINED      = 

# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
# remove all references to function-like macros that are alone on a line, have
# an all uppercase name, and do not end with a semicolon. Such function macros
# are typically used for boiler-plate code, and will confuse the parser if not
# removed.
# The default value is: YES.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.

SKIP_FUNCTION_MACROS   = YES

#---------------------------------------------------------------------------
# Configuration options related to external references
#---------------------------------------------------------------------------

# The TAGFILES tag can be used to specify one or more tag files. For each tag
# file the location of the external documentation should be added. The format of
# a tag file without this location is as follows:
# TAGFILES = file1 file2 ...
# Adding location for the tag files is done as follows:
# TAGFILES = file1=loc1 "file2 = loc2" ...
# where loc1 and loc2 can be relative or absolute paths or URLs. See the
# section "Linking to external documentation" for more information about the use
# of tag files.
# Note: Each tag file must have a unique name (where the name does NOT include
# the path). If a tag file is not located in the directory in which doxygen is
# run, you must also specify the path to the tagfile here.

TAGFILES               = 

# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
# tag file that is based on the input files it reads. See section "Linking to
# external documentation" for more information about the usage of tag files.

GENERATE_TAGFILE       = 

# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
# the class index. If set to NO, only the inherited external classes will be
# listed.
# The default value is: NO.

ALLEXTERNALS           = NO

# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
# in the modules index. If set to NO, only the current project's groups will be
# listed.
# The default value is: YES.

EXTERNAL_GROUPS        = YES

# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
# the related pages index. If set to NO, only the current project's pages will
# be listed.
# The default value is: YES.

EXTERNAL_PAGES         = YES

# The PERL_PATH should be the absolute path and name of the perl script
# interpreter (i.e. the result of 'which perl').
# The default file (with absolute path) is: /usr/bin/perl.

PERL_PATH              = /usr/bin/perl

#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------

# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
# NO turns the diagrams off. Note that this option also works with HAVE_DOT
# disabled, but it is recommended to install and use dot, since it yields more
# powerful graphs.
# The default value is: YES.

CLASS_DIAGRAMS         = NO

# You can define message sequence charts within doxygen comments using the \msc
# command. Doxygen will then run the mscgen tool (see:
# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
# documentation. The MSCGEN_PATH tag allows you to specify the directory where
# the mscgen tool resides. If left empty the tool is assumed to be found in the
# default search path.

MSCGEN_PATH            = 

# You can include diagrams made with dia in doxygen documentation. Doxygen will
# then run dia to produce the diagram and insert it in the documentation. The
# DIA_PATH tag allows you to specify the directory where the dia binary resides.
# If left empty dia is assumed to be found in the default search path.

DIA_PATH               = 

# If set to YES the inheritance and collaboration graphs will hide inheritance
# and usage relations if the target is undocumented or is not a class.
# The default value is: YES.

HIDE_UNDOC_RELATIONS   = YES

# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
# available from the path. This tool is part of Graphviz (see:
# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
# Bell Labs. The other options in this section have no effect if this option is
# set to NO
# The default value is: NO.

HAVE_DOT               = NO

# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
# to run in parallel. When set to 0 doxygen will base this on the number of
# processors available in the system. You can set it explicitly to a value
# larger than 0 to get control over the balance between CPU load and processing
# speed.
# Minimum value: 0, maximum value: 32, default value: 0.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_NUM_THREADS        = 0

# When you want a differently looking font in the dot files that doxygen
# generates you can specify the font name using DOT_FONTNAME. You need to make
# sure dot is able to find the font, which can be done by putting it in a
# standard location or by setting the DOTFONTPATH environment variable or by
# setting DOT_FONTPATH to the directory containing the font.
# The default value is: Helvetica.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_FONTNAME           = Helvetica

# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
# dot graphs.
# Minimum value: 4, maximum value: 24, default value: 10.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_FONTSIZE           = 10

# By default doxygen will tell dot to use the default font as specified with
# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
# the path where dot can find it using this tag.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_FONTPATH           = 

# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
# each documented class showing the direct and indirect inheritance relations.
# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

CLASS_GRAPH            = YES

# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
# graph for each documented class showing the direct and indirect implementation
# dependencies (inheritance, containment, and class references variables) of the
# class with other documented classes.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

COLLABORATION_GRAPH    = YES

# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
# groups, showing the direct groups dependencies.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

GROUP_GRAPHS           = YES

# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
# collaboration diagrams in a style similar to the OMG's Unified Modeling
# Language.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

UML_LOOK               = NO

# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
# class node. If there are many fields or methods and many nodes the graph may
# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
# number of items for each type to make the size more manageable. Set this to 0
# for no limit. Note that the threshold may be exceeded by 50% before the limit
# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
# but if the number exceeds 15, the total amount of fields shown is limited to
# 10.
# Minimum value: 0, maximum value: 100, default value: 10.
# This tag requires that the tag HAVE_DOT is set to YES.

UML_LIMIT_NUM_FIELDS   = 10

# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
# collaboration graphs will show the relations between templates and their
# instances.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

TEMPLATE_RELATIONS     = NO

# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
# YES then doxygen will generate a graph for each documented file showing the
# direct and indirect include dependencies of the file with other documented
# files.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

INCLUDE_GRAPH          = YES

# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
# set to YES then doxygen will generate a graph for each documented file showing
# the direct and indirect include dependencies of the file with other documented
# files.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

INCLUDED_BY_GRAPH      = YES

# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
# dependency graph for every global function or class method.
#
# Note that enabling this option will significantly increase the time of a run.
# So in most cases it will be better to enable call graphs for selected
# functions only using the \callgraph command. Disabling a call graph can be
# accomplished by means of the command \hidecallgraph.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

CALL_GRAPH             = NO

# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
# dependency graph for every global function or class method.
#
# Note that enabling this option will significantly increase the time of a run.
# So in most cases it will be better to enable caller graphs for selected
# functions only using the \callergraph command. Disabling a caller graph can be
# accomplished by means of the command \hidecallergraph.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

CALLER_GRAPH           = NO

# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
# hierarchy of all classes instead of a textual one.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

GRAPHICAL_HIERARCHY    = YES

# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
# dependencies a directory has on other directories in a graphical way. The
# dependency relations are determined by the #include relations between the
# files in the directories.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

DIRECTORY_GRAPH        = YES

# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
# generated by dot. For an explanation of the image formats see the section
# output formats in the documentation of the dot tool (Graphviz (see:
# http://www.graphviz.org/)).
# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
# to make the SVG files visible in IE 9+ (other browsers do not have this
# requirement).
# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
# png:gdiplus:gdiplus.
# The default value is: png.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_IMAGE_FORMAT       = png

# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
# enable generation of interactive SVG images that allow zooming and panning.
#
# Note that this requires a modern browser other than Internet Explorer. Tested
# and working are Firefox, Chrome, Safari, and Opera.
# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
# the SVG files visible. Older versions of IE do not have SVG support.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

INTERACTIVE_SVG        = NO

# The DOT_PATH tag can be used to specify the path where the dot tool can be
# found. If left blank, it is assumed the dot tool can be found in the path.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_PATH               = 

# The DOTFILE_DIRS tag can be used to specify one or more directories that
# contain dot files that are included in the documentation (see the \dotfile
# command).
# This tag requires that the tag HAVE_DOT is set to YES.

DOTFILE_DIRS           = 

# The MSCFILE_DIRS tag can be used to specify one or more directories that
# contain msc files that are included in the documentation (see the \mscfile
# command).

MSCFILE_DIRS           = 

# The DIAFILE_DIRS tag can be used to specify one or more directories that
# contain dia files that are included in the documentation (see the \diafile
# command).

DIAFILE_DIRS           = 

# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
# path where java can find the plantuml.jar file. If left blank, it is assumed
# PlantUML is not used or called during a preprocessing step. Doxygen will
# generate a warning when it encounters a \startuml command in this case and
# will not generate output for the diagram.

PLANTUML_JAR_PATH      = 

# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
# configuration file for plantuml.

PLANTUML_CFG_FILE      = 

# When using plantuml, the specified paths are searched for files specified by
# the !include statement in a plantuml block.

PLANTUML_INCLUDE_PATH  = 

# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
# that will be shown in the graph. If the number of nodes in a graph becomes
# larger than this value, doxygen will truncate the graph, which is visualized
# by representing a node as a red box. Note that doxygen if the number of direct
# children of the root node in a graph is already larger than
# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
# Minimum value: 0, maximum value: 10000, default value: 50.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_GRAPH_MAX_NODES    = 50

# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
# generated by dot. A depth value of 3 means that only nodes reachable from the
# root by following a path via at most 3 edges will be shown. Nodes that lay
# further from the root node will be omitted. Note that setting this option to 1
# or 2 may greatly reduce the computation time needed for large code bases. Also
# note that the size of a graph can be further restricted by
# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
# Minimum value: 0, maximum value: 1000, default value: 0.
# This tag requires that the tag HAVE_DOT is set to YES.

MAX_DOT_GRAPH_DEPTH    = 0

# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
# background. This is disabled by default, because dot on Windows does not seem
# to support this out of the box.
#
# Warning: Depending on the platform used, enabling this option may lead to
# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
# read).
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_TRANSPARENT        = NO

# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
# files in one run (i.e. multiple -o and -T options on the command line). This
# makes dot run faster, but since only newer versions of dot (>1.8.10) support
# this, this feature is disabled by default.
# The default value is: NO.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_MULTI_TARGETS      = NO

# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
# explaining the meaning of the various boxes and arrows in the dot generated
# graphs.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

GENERATE_LEGEND        = YES

# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
# files that are used to generate the various graphs.
# The default value is: YES.
# This tag requires that the tag HAVE_DOT is set to YES.

DOT_CLEANUP            = YES


================================================
FILE: docs/benchmarks/benchmark_results.md
================================================
# Dispenso Benchmark Results

## Machine Information

- **Date**: 2026-02-05T15:59:21.769268
- **Platform**: Linux 6.17.4-200.fc42.x86_64
- **CPU**: AMD Ryzen Threadripper PRO 7995WX 96-Cores
- **Hardware Threads**: 192
- **Memory**: 250.9 GB

## Results Summary

- **Benchmarks run**: 18
- **Successful**: 18
- **Failed**: 0

### concurrent_vector

**Serial/Access Operations:**

![concurrent_vector serial](concurrent_vector_serial_chart.png)

**Parallel Operations:**

![concurrent_vector parallel](concurrent_vector_parallel_chart.png)

[View detailed results table](concurrent_vector_details.md)

### for_latency

**default elements:**

![for_latency default](for_latency_default_chart.png)

[View detailed results table](for_latency_details.md)

### future

**Full comparison (including std::async):**

![future results](future_chart.png)

**Zoomed (excluding std::async):**

![future zoomed](future_zoomed_chart.png)

[View detailed results table](future_details.md)

### graph

![graph results](graph_chart.png)

[View detailed results table](graph_details.md)

### graph_scene

![graph_scene results](graph_scene_chart.png)

[View detailed results table](graph_scene_details.md)

### idle_pool

**default elements:**

![idle_pool default](idle_pool_default_chart.png)

**1K elements:**

![idle_pool 1K](idle_pool_1000_chart.png)

**10K elements:**

![idle_pool 10K](idle_pool_10000_chart.png)

**1M elements:**

![idle_pool 1M](idle_pool_1000000_chart.png)

[View detailed results table](idle_pool_details.md)

### nested_for

**10 elements:**

![nested_for 10](nested_for_10_chart.png)

**10 elements (Y-Axis Zoomed):**

![nested_for 10 zoomed](nested_for_10_zoomed_chart.png)

**500 elements:**

![nested_for 500](nested_for_500_chart.png)

**500 elements (Y-Axis Zoomed):**

![nested_for 500 zoomed](nested_for_500_zoomed_chart.png)

**3K elements:**

![nested_for 3K](nested_for_3000_chart.png)

**3K elements (Y-Axis Zoomed):**

![nested_for 3K zoomed](nested_for_3000_zoomed_chart.png)

[View detailed results table](nested_for_details.md)

### nested_pool

**1K elements:**

![nested_pool 1K](nested_pool_1000_chart.png)

**10K elements:**

![nested_pool 10K](nested_pool_10000_chart.png)

**1M elements:**

![nested_pool 1M](nested_pool_1000000_chart.png)

*Note: folly::CPUThreadPoolExecutor is excluded from the 1M chart as it fails to complete (likely due to memory exhaustion from creating too many futures).*

[View detailed results table](nested_pool_details.md)

### once_function

**Move Operations:**

![once_function move](once_function_move_chart.png)

**Queue Operations:**

![once_function queue](once_function_queue_chart.png)

[View detailed results table](once_function_details.md)

### pipeline

![pipeline results](pipeline_chart.png)

[View detailed results table](pipeline_details.md)

### pool_allocator

**Single-threaded:**

![pool_allocator 1t](pool_allocator_1t_chart.png)

**2 Threads:**

![pool_allocator 2t](pool_allocator_2t_chart.png)

**8 Threads:**

![pool_allocator 8t](pool_allocator_8t_chart.png)

**16 Threads:**

![pool_allocator 16t](pool_allocator_16t_chart.png)

[View detailed results table](pool_allocator_details.md)

### rw_lock

**Serial Operations:**

![rw_lock serial](rw_lock_serial_chart.png)

**Parallel Operations:**

**2 Iterations:**

![rw_lock parallel 2](rw_lock_parallel_2_chart.png)

**8 Iterations:**

![rw_lock parallel 8](rw_lock_parallel_8_chart.png)

**32 Iterations:**

![rw_lock parallel 32](rw_lock_parallel_32_chart.png)

**128 Iterations:**

![rw_lock parallel 128](rw_lock_parallel_128_chart.png)

**512 Iterations:**

![rw_lock parallel 512](rw_lock_parallel_512_chart.png)

[View detailed results table](rw_lock_details.md)

### simple_for

**1K elements:**

![simple_for 1K](simple_for_1000_chart.png)

**1K elements (Y-Axis Zoomed):**

![simple_for 1K zoomed](simple_for_1000_zoomed_chart.png)

**1M elements:**

![simple_for 1M](simple_for_1000000_chart.png)

**1M elements (Y-Axis Zoomed):**

![simple_for 1M zoomed](simple_for_1000000_zoomed_chart.png)

**100M elements:**

![simple_for 100M](simple_for_100000000_chart.png)

**100M elements (Y-Axis Zoomed):**

![simple_for 100M zoomed](simple_for_100000000_zoomed_chart.png)

[View detailed results table](simple_for_details.md)

### simple_pool

**1K elements:**

![simple_pool 1K](simple_pool_1000_chart.png)

**10K elements:**

![simple_pool 10K](simple_pool_10000_chart.png)

**1M elements:**

![simple_pool 1M](simple_pool_1000000_chart.png)

[View detailed results table](simple_pool_details.md)

### small_buffer

![small_buffer results](small_buffer_chart.png)

[View detailed results table](small_buffer_details.md)

### summing_for

**1K elements:**

![summing_for 1K](summing_for_1000_chart.png)

**1K elements (Y-Axis Zoomed):**

![summing_for 1K zoomed](summing_for_1000_zoomed_chart.png)

**1M elements:**

![summing_for 1M](summing_for_1000000_chart.png)

**1M elements (Y-Axis Zoomed):**

![summing_for 1M zoomed](summing_for_1000000_zoomed_chart.png)

**100M elements:**

![summing_for 100M](summing_for_100000000_chart.png)

**100M elements (Y-Axis Zoomed):**

![summing_for 100M zoomed](summing_for_100000000_zoomed_chart.png)

[View detailed results table](summing_for_details.md)

### timed_task

*Visualization not available for this benchmark.*

[View detailed results table](timed_task_details.md)

### trivial_compute

**100 elements:**

![trivial_compute 100](trivial_compute_100_chart.png)

**100 elements (Y-Axis Zoomed):**

![trivial_compute 100 zoomed](trivial_compute_100_zoomed_chart.png)

**1M elements:**

![trivial_compute 1M](trivial_compute_1000000_chart.png)

**1M elements (Y-Axis Zoomed):**

![trivial_compute 1M zoomed](trivial_compute_1000000_zoomed_chart.png)

**100M elements:**

![trivial_compute 100M](trivial_compute_100000000_chart.png)

**100M elements (Y-Axis Zoomed):**

![trivial_compute 100M zoomed](trivial_compute_100000000_zoomed_chart.png)

[View detailed results table](trivial_compute_details.md)


================================================
FILE: docs/benchmarks/concurrent_vector_details.md
================================================
# concurrent_vector - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_dispenso_parallel_grow_by_max | 101003.60 | ns | 7188 |
| BM_std_index | 106858.12 | ns | 6725 |
| BM_std_iterate_reverse | 110684.51 | ns | 6546 |
| BM_std_iterate | 111485.94 | ns | 6353 |
| BM_std_push_back_serial_reserve | 218703.74 | ns | 3249 |
| BM_dispenso_push_back_serial_grow_by_alternative_reserve | 219040.99 | ns | 3285 |
| BM_dispenso_push_back_serial_grow_by_alternative | 219550.30 | ns | 3271 |
| BM_dispenso_iterate_reverse | 255835.84 | ns | 2759 |
| BM_dispenso_parallel_grow_by_100 | 306185.16 | ns | 2330 |
| BM_dispenso_iterate | 325710.90 | ns | 2210 |
| BM_deque_iterate | 391508.18 | ns | 1703 |
| BM_deque_push_back_serial | 394323.34 | ns | 1781 |
| BM_deque_iterate_reverse | 398834.15 | ns | 1735 |
| BM_tbb_index | 554215.73 | ns | 1220 |
| BM_tbb_iterate | 642360.91 | ns | 1077 |
| BM_deque_index | 669881.35 | ns | 1105 |
| BM_tbb_parallel_grow_by_max | 726051.72 | ns | 981 |
| BM_std_random | 785495.08 | ns | 904 |
| BM_dispenso_index | 901852.14 | ns | 757 |
| BM_tbb_iterate_reverse | 1272616.17 | ns | 558 |
| BM_tbb_random | 1292540.94 | ns | 560 |
| BM_deque_random | 1619131.51 | ns | 465 |
| BM_dispenso_push_back_serial_reserve | 1823914.35 | ns | 369 |
| BM_tbb_parallel_grow_by_100 | 2081724.50 | ns | 364 |
| BM_deque_parallel_grow_by_max | 2712039.31 | ns | 426 |
| BM_std_push_back_serial | 2850313.38 | ns | 243 |
| BM_std_parallel_grow_by_max | 2883582.54 | ns | 405 |
| BM_dispenso_parallel_grow_by_10 | 2969110.81 | ns | 260 |
| BM_dispenso_random | 2982333.81 | ns | 247 |
| BM_tbb_push_back_serial_grow_by_alternative | 3372009.29 | ns | 207 |
| BM_tbb_push_back_serial_grow_by_alternative_reserve | 3425661.25 | ns | 209 |
| BM_tbb_push_back_serial | 4311813.53 | ns | 156 |
| BM_tbb_push_back_serial_reserve | 4413419.62 | ns | 165 |
| BM_dispenso_push_back_serial | 5946197.95 | ns | 112 |
| BM_tbb_parallel_grow_by_10 | 7933075.11 | ns | 90 |
| BM_std_parallel_grow_by_100 | 13813984.13 | ns | 54 |
| BM_deque_parallel_grow_by_100 | 17067703.38 | ns | 100 |
| BM_std_lower_bound | 26307376.03 | ns | 27 |
| BM_dispenso_parallel_clear | 26754469.40 | ns | 26 |
| BM_dispenso_parallel_reserve | 27197122.78 | ns | 27 |
| BM_std_parallel_grow_by_10 | 31354538.79 | ns | 24 |
| BM_dispenso_lower_bound | 33074250.53 | ns | 21 |
| BM_deque_lower_bound | 34870844.87 | ns | 21 |
| BM_dispenso_parallel | 35100432.49 | ns | 20 |
| BM_tbb_lower_bound | 35831723.10 | ns | 21 |
| BM_deque_parallel_grow_by_10 | 37788011.33 | ns | 19 |
| BM_tbb_parallel | 44999558.13 | ns | 16 |
| BM_tbb_parallel_reserve | 45318455.78 | ns | 16 |
| BM_tbb_parallel_clear | 45865617.69 | ns | 16 |
| BM_std_parallel_reserve | 110097669.38 | ns | 6 |
| BM_std_parallel_clear | 116452485.01 | ns | 6 |
| BM_std_parallel | 122873134.17 | ns | 5 |
| BM_deque_parallel | 130722294.75 | ns | 5 |
| BM_deque_parallel_clear | 133983735.74 | ns | 5 |


================================================
FILE: docs/benchmarks/concurrent_vector_tcmalloc_details.md
================================================
# concurrent_vector_tcmalloc - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_dispenso_parallel_grow_by_max | 94711.39 | ns | 7194 |
| BM_std_index | 109426.63 | ns | 6719 |
| BM_std_iterate_reverse | 109865.98 | ns | 6654 |
| BM_std_iterate | 110317.93 | ns | 6742 |
| BM_std_push_back_serial_reserve | 220767.06 | ns | 3228 |
| BM_dispenso_push_back_serial_grow_by_alternative_reserve | 225284.88 | ns | 3330 |
| BM_dispenso_push_back_serial_grow_by_alternative | 226044.39 | ns | 3222 |
| BM_dispenso_iterate_reverse | 252425.97 | ns | 2869 |
| BM_std_push_back_serial | 296661.69 | ns | 2471 |
| BM_deque_push_back_serial | 315272.88 | ns | 2116 |
| BM_dispenso_iterate | 321445.97 | ns | 2302 |
| BM_dispenso_parallel_grow_by_100 | 327465.75 | ns | 2166 |
| BM_deque_iterate | 393058.36 | ns | 1828 |
| BM_deque_iterate_reverse | 408638.55 | ns | 1777 |
| BM_tbb_index | 557577.77 | ns | 1333 |
| BM_tbb_iterate | 642920.27 | ns | 1133 |
| BM_deque_index | 665543.73 | ns | 1104 |
| BM_tbb_parallel_grow_by_max | 694381.15 | ns | 1020 |
| BM_std_random | 814733.74 | ns | 830 |
| BM_dispenso_index | 881164.69 | ns | 811 |
| BM_tbb_random | 1254265.98 | ns | 585 |
| BM_tbb_iterate_reverse | 1341082.67 | ns | 556 |
| BM_deque_random | 1555339.24 | ns | 437 |
| BM_dispenso_push_back_serial_reserve | 1811815.80 | ns | 408 |
| BM_tbb_parallel_grow_by_100 | 2042351.42 | ns | 363 |
| BM_dispenso_parallel_grow_by_10 | 2858888.56 | ns | 254 |
| BM_dispenso_random | 2966586.89 | ns | 241 |
| BM_std_parallel_grow_by_max | 2997329.75 | ns | 394 |
| BM_tbb_push_back_serial_grow_by_alternative | 3401108.28 | ns | 211 |
| BM_tbb_push_back_serial_grow_by_alternative_reserve | 3436731.78 | ns | 211 |
| BM_deque_parallel_grow_by_max | 3786375.45 | ns | 302 |
| BM_tbb_push_back_serial | 4419180.69 | ns | 154 |
| BM_tbb_push_back_serial_reserve | 4477015.58 | ns | 160 |
| BM_dispenso_push_back_serial | 4656596.25 | ns | 147 |
| BM_tbb_parallel_grow_by_10 | 7976818.67 | ns | 93 |
| BM_std_parallel_grow_by_100 | 14841504.28 | ns | 50 |
| BM_deque_parallel_grow_by_100 | 16374853.73 | ns | 188 |
| BM_dispenso_parallel | 27056859.12 | ns | 27 |
| BM_dispenso_parallel_reserve | 27211764.04 | ns | 26 |
| BM_std_lower_bound | 27674214.70 | ns | 26 |
| BM_dispenso_parallel_clear | 27796568.97 | ns | 27 |
| BM_std_parallel_grow_by_10 | 32000467.83 | ns | 23 |
| BM_dispenso_lower_bound | 33722069.29 | ns | 21 |
| BM_tbb_lower_bound | 35792074.91 | ns | 20 |
| BM_deque_lower_bound | 36033107.23 | ns | 21 |
| BM_deque_parallel_grow_by_10 | 39033348.15 | ns | 18 |
| BM_tbb_parallel | 45672182.23 | ns | 16 |
| BM_tbb_parallel_reserve | 45781568.86 | ns | 16 |
| BM_tbb_parallel_clear | 45881552.04 | ns | 16 |
| BM_std_parallel_reserve | 109698760.19 | ns | 7 |
| BM_std_parallel_clear | 113008682.93 | ns | 6 |
| BM_std_parallel | 116849953.00 | ns | 6 |
| BM_deque_parallel_clear | 118793300.97 | ns | 6 |
| BM_deque_parallel | 119249812.02 | ns | 6 |


================================================
FILE: docs/benchmarks/for_latency_details.md
================================================
# for_latency - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_tbb/128/real_time | 30671103.82 | ns | 23 |
| BM_tbb/64/real_time | 30684292.48 | ns | 23 |
| BM_tbb/192/real_time | 30727495.24 | ns | 23 |
| BM_dispenso/192/real_time | 30787112.84 | ns | 23 |
| BM_tbb/96/real_time | 30845612.29 | ns | 23 |
| BM_tbb/32/real_time | 30862872.61 | ns | 23 |
| BM_tbb/24/real_time | 30868742.29 | ns | 23 |
| BM_dispenso/96/real_time | 30869602.35 | ns | 23 |
| BM_dispenso/128/real_time | 30870548.09 | ns | 23 |
| BM_tbb/48/real_time | 30880625.89 | ns | 23 |
| BM_omp/32/real_time | 30903007.35 | ns | 23 |
| BM_dispenso/64/real_time | 30907894.77 | ns | 23 |
| BM_dispenso/48/real_time | 30912572.19 | ns | 23 |
| BM_omp/48/real_time | 30921780.38 | ns | 23 |
| BM_tbb/16/real_time | 30985629.11 | ns | 23 |
| BM_omp/64/real_time | 31000481.11 | ns | 23 |
| BM_omp/24/real_time | 31005283.17 | ns | 23 |
| BM_tbb/12/real_time | 31025142.61 | ns | 23 |
| BM_dispenso/32/real_time | 31051012.77 | ns | 22 |
| BM_tbb/8/real_time | 31081565.78 | ns | 23 |
| BM_dispenso/24/real_time | 31084572.49 | ns | 23 |
| BM_omp/16/real_time | 31089434.64 | ns | 23 |
| BM_omp/96/real_time | 31202504.49 | ns | 23 |
| BM_dispenso/16/real_time | 31203918.07 | ns | 22 |
| BM_omp/12/real_time | 31261700.12 | ns | 22 |
| BM_tbb/6/real_time | 31310660.09 | ns | 22 |
| BM_dispenso/8/real_time | 31360803.76 | ns | 22 |
| BM_dispenso/12/real_time | 31397428.45 | ns | 22 |
| BM_omp/8/real_time | 31440352.17 | ns | 22 |
| BM_omp/6/real_time | 31527165.15 | ns | 22 |
| BM_omp/128/real_time | 31648196.10 | ns | 21 |
| BM_tbb/4/real_time | 31666634.27 | ns | 22 |
| BM_dispenso/6/real_time | 31692434.02 | ns | 22 |
| BM_omp/4/real_time | 31863744.29 | ns | 22 |
| BM_tbb/3/real_time | 31948655.25 | ns | 22 |
| BM_omp/3/real_time | 32270322.81 | ns | 22 |
| BM_dispenso/4/real_time | 32570220.18 | ns | 21 |
| BM_tbb/2/real_time | 32705506.75 | ns | 21 |
| BM_omp/2/real_time | 32883003.00 | ns | 21 |
| BM_dispenso/3/real_time | 33338752.18 | ns | 21 |
| BM_dispenso/2/real_time | 34245214.15 | ns | 20 |
| BM_dispenso/1/real_time | 34983571.99 | ns | 20 |
| BM_serial/real_time | 35045663.45 | ns | 20 |
| BM_omp/1/real_time | 35077819.41 | ns | 20 |
| BM_omp/192/real_time | 35145964.01 | ns | 20 |
| BM_tbb/1/real_time | 35147623.07 | ns | 20 |


================================================
FILE: docs/benchmarks/future_details.md
================================================
# future - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_serial_tree<kSmallSize>/real_time | 316423.21 | ns | 2182 |
| BM_dispenso_taskset_tree<kSmallSize>/real_time | 666544.19 | ns | 1197 |
| BM_dispenso_tree<kSmallSize>/real_time | 941047.69 | ns | 720 |
| BM_dispenso_tree_when_all<kSmallSize>/real_time | 980887.75 | ns | 678 |
| BM_folly_tree<kSmallSize>/real_time | 1910670.03 | ns | 346 |
| BM_serial_tree<kMediumSize>/real_time | 2494288.26 | ns | 277 |
| BM_dispenso_taskset_tree<kMediumSize>/real_time | 2542309.12 | ns | 279 |
| BM_dispenso_tree<kMediumSize>/real_time | 4200437.39 | ns | 167 |
| BM_dispenso_tree_when_all<kMediumSize>/real_time | 4399805.82 | ns | 159 |
| BM_folly_tree<kMediumSize>/real_time | 10451328.26 | ns | 60 |
| BM_dispenso_taskset_tree<kLargeSize>/real_time | 13767162.49 | ns | 50 |
| BM_serial_tree<kLargeSize>/real_time | 20290996.29 | ns | 35 |
| BM_dispenso_tree<kLargeSize>/real_time | 25793796.47 | ns | 28 |
| BM_dispenso_tree_when_all<kLargeSize>/real_time | 28994647.40 | ns | 24 |
| BM_folly_tree<kLargeSize>/real_time | 75235617.35 | ns | 8 |
| BM_std_tree<kSmallSize>/real_time | 174957101.54 | ns | 3 |
| BM_std_tree<kMediumSize>/real_time | 1593472268.43 | ns | 1 |
| BM_std_tree<kLargeSize>/real_time | 22856111893.43 | ns | 1 |


================================================
FILE: docs/benchmarks/graph_details.md
================================================
# graph - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_build_bi_prop_dependency_group | 430.35 | ns | 1567773 |
| BM_execute_dependency_chain<dispenso::Graph> | 7832.33 | ns | 91950 |
| BM_execute_dependency_chain<dispenso::BiPropGraph> | 7881.34 | ns | 90236 |
| BM_build_dependency_chain<dispenso::Graph> | 17572.99 | ns | 39706 |
| BM_build_dependency_chain<dispenso::BiPropGraph> | 18148.89 | ns | 39736 |
| BM_build_bi_prop_dependency_chain | 44498.37 | ns | 15908 |
| BM_forward_propagator_node<dispenso::Graph> | 1428797.45 | ns | 486 |
| BM_forward_propagator_node<dispenso::BiPropGraph> | 1463463.54 | ns | 480 |
| BM_build_big_tree<dispenso::BiPropGraph> | 1535004.83 | ns | 463 |
| BM_build_big_tree<dispenso::Graph> | 1728478.59 | ns | 411 |
| BM_taskflow_build_big_tree | 2741159.89 | ns | 247 |


================================================
FILE: docs/benchmarks/graph_scene_details.md
================================================
# graph_scene - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_scene_graph_partial_revaluation/real_time | 756865.19 | ns | 932 |
| BM_scene_graph_parallel_for/real_time | 11785746.10 | ns | 58 |
| BM_scene_graph_taskflow/real_time | 12178284.88 | ns | 67 |
| BM_scene_graph_concurrent_task_set/real_time | 12321619.66 | ns | 59 |


================================================
FILE: docs/benchmarks/idle_pool_details.md
================================================
# idle_pool - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_tbb_mostly_idle/1/1000/real_time | 28.13 | us | 24863 |
| BM_tbb_mostly_idle/2/1000/real_time | 28.78 | us | 23604 |
| BM_tbb_mostly_idle/3/1000/real_time | 29.73 | us | 23330 |
| BM_tbb_mostly_idle/4/1000/real_time | 31.36 | us | 22399 |
| BM_tbb_mostly_idle/6/1000/real_time | 33.99 | us | 20203 |
| BM_tbb_mostly_idle/8/1000/real_time | 38.64 | us | 17549 |
| BM_tbb_mostly_idle/12/1000/real_time | 53.63 | us | 11821 |
| BM_dispenso_mostly_idle/1/1000/real_time | 54.49 | us | 12961 |
| BM_dispenso_mostly_idle/2/1000/real_time | 54.80 | us | 12822 |
| BM_tbb_mostly_idle/16/1000/real_time | 70.06 | us | 10541 |
| BM_dispenso_mostly_idle/3/1000/real_time | 85.46 | us | 8101 |
| BM_dispenso_mostly_idle/4/1000/real_time | 116.65 | us | 6034 |
| BM_tbb_mostly_idle/24/1000/real_time | 117.01 | us | 5711 |
| BM_tbb_mostly_idle/32/1000/real_time | 169.01 | us | 3924 |
| BM_dispenso_mostly_idle/6/1000/real_time | 189.33 | us | 3749 |
| BM_tbb_mostly_idle/1/10000/real_time | 249.67 | us | 2777 |
| BM_tbb_mostly_idle/2/10000/real_time | 262.80 | us | 2678 |
| BM_tbb_mostly_idle/3/10000/real_time | 275.62 | us | 2526 |
| BM_dispenso_mostly_idle/1/10000/real_time | 296.07 | us | 2364 |
| BM_tbb_mostly_idle/4/10000/real_time | 296.50 | us | 2430 |
| BM_dispenso_mostly_idle/2/10000/real_time | 298.61 | us | 2374 |
| BM_tbb_mostly_idle/48/1000/real_time | 301.86 | us | 2255 |
| BM_dispenso_mostly_idle/8/1000/real_time | 304.32 | us | 2296 |
| BM_tbb_mostly_idle/6/10000/real_time | 322.27 | us | 2073 |
| BM_dispenso_mostly_idle/3/10000/real_time | 328.85 | us | 2115 |
| BM_dispenso_mostly_idle/4/10000/real_time | 401.27 | us | 1721 |
| BM_tbb_mostly_idle/8/10000/real_time | 413.26 | us | 1371 |
| BM_tbb_mostly_idle/64/1000/real_time | 436.96 | us | 1620 |
| BM_dispenso_mostly_idle/12/1000/real_time | 518.76 | us | 1360 |
| BM_dispenso_mostly_idle/6/10000/real_time | 535.27 | us | 1275 |
| BM_dispenso_mostly_idle/8/10000/real_time | 591.77 | us | 1191 |
| BM_tbb_mostly_idle/96/1000/real_time | 697.71 | us | 995 |
| BM_tbb_mostly_idle/12/10000/real_time | 719.68 | us | 808 |
| BM_dispenso_mostly_idle/16/1000/real_time | 734.50 | us | 943 |
| BM_dispenso_mostly_idle/12/10000/real_time | 892.62 | us | 825 |
| BM_tbb_mostly_idle/128/1000/real_time | 977.00 | us | 716 |
| BM_tbb_mostly_idle/16/10000/real_time | 1007.70 | us | 671 |
| BM_dispenso_mostly_idle/24/1000/real_time | 1150.27 | us | 618 |
| BM_dispenso_mostly_idle/16/10000/real_time | 1164.93 | us | 625 |
| BM_tbb_mostly_idle/192/1000/real_time | 1443.46 | us | 521 |
| BM_dispenso_mostly_idle/32/1000/real_time | 1589.50 | us | 433 |
| BM_dispenso_mostly_idle/24/10000/real_time | 1611.86 | us | 435 |
| BM_tbb_mostly_idle/24/10000/real_time | 2100.22 | us | 332 |
| BM_dispenso_mostly_idle/32/10000/real_time | 2129.78 | us | 333 |
| BM_dispenso_mostly_idle/48/1000/real_time | 2431.86 | us | 279 |
| BM_tbb_mostly_idle/32/10000/real_time | 2489.99 | us | 274 |
| BM_dispenso_mostly_idle/48/10000/real_time | 3000.61 | us | 229 |
| BM_dispenso_mostly_idle/64/1000/real_time | 3347.02 | us | 205 |
| BM_tbb_mostly_idle/48/10000/real_time | 3833.92 | us | 188 |
| BM_dispenso_mostly_idle/64/10000/real_time | 4348.10 | us | 161 |
| BM_tbb_mostly_idle/64/10000/real_time | 4752.66 | us | 145 |
| BM_dispenso_mostly_idle/96/1000/real_time | 5231.94 | us | 135 |
| BM_dispenso_mostly_idle/96/10000/real_time | 5953.73 | us | 129 |
| BM_dispenso_mostly_idle/128/1000/real_time | 7138.42 | us | 102 |
| BM_tbb_mostly_idle/96/10000/real_time | 7776.04 | us | 95 |
| BM_dispenso_mostly_idle/128/10000/real_time | 8442.95 | us | 84 |
| BM_tbb_mostly_idle/128/10000/real_time | 10351.21 | us | 67 |
| BM_dispenso_mostly_idle/192/1000/real_time | 10740.92 | us | 68 |
| BM_tbb_mostly_idle/192/10000/real_time | 10748.11 | us | 83 |
| BM_dispenso_mostly_idle/192/10000/real_time | 12548.88 | us | 52 |
| BM_tbb_mostly_idle/1/1000000/real_time | 24101.02 | us | 29 |
| BM_tbb_mostly_idle/2/1000000/real_time | 25146.24 | us | 27 |
| BM_dispenso_mostly_idle/1/1000000/real_time | 25780.46 | us | 27 |
| BM_dispenso_mostly_idle/2/1000000/real_time | 25806.83 | us | 26 |
| BM_dispenso_mostly_idle/4/1000000/real_time | 26408.81 | us | 26 |
| BM_dispenso_mostly_idle/3/1000000/real_time | 26478.24 | us | 26 |
| BM_tbb_mostly_idle/4/1000000/real_time | 27791.26 | us | 25 |
| BM_dispenso_mostly_idle/6/1000000/real_time | 27828.24 | us | 26 |
| BM_tbb_mostly_idle/3/1000000/real_time | 28087.34 | us | 27 |
| BM_tbb_mostly_idle/6/1000000/real_time | 31796.60 | us | 21 |
| BM_dispenso_mostly_idle/8/1000000/real_time | 37281.95 | us | 20 |
| BM_dispenso_mostly_idle/16/1000000/real_time | 41165.59 | us | 14 |
| BM_tbb_mostly_idle/8/1000000/real_time | 44492.20 | us | 18 |
| BM_dispenso_mostly_idle/12/1000000/real_time | 57657.43 | us | 10 |
| BM_dispenso_mostly_idle/24/1000000/real_time | 62123.15 | us | 10 |
| BM_dispenso_mostly_idle/32/1000000/real_time | 66075.62 | us | 16 |
| BM_tbb_mostly_idle/12/1000000/real_time | 69007.01 | us | 11 |
| BM_tbb_mostly_idle/16/1000000/real_time | 94104.69 | us | 7 |
| BM_dispenso_mostly_idle/48/1000000/real_time | 97169.79 | us | 10 |
| BM_dispenso_very_idle/1/real_time | 100082.07 | us | 7 |
| BM_tbb_very_idle/192/real_time | 100087.63 | us | 7 |
| BM_tbb_very_idle/6/real_time | 100089.99 | us | 7 |
| BM_tbb_very_idle/16/real_time | 100090.05 | us | 7 |
| BM_tbb_very_idle/1/real_time | 100090.78 | us | 7 |
| BM_tbb_very_idle/2/real_time | 100092.04 | us | 7 |
| BM_tbb_very_idle/3/real_time | 100092.44 | us | 7 |
| BM_tbb_very_idle/64/real_time | 100093.44 | us | 7 |
| BM_tbb_very_idle/32/real_time | 100093.52 | us | 7 |
| BM_tbb_very_idle/8/real_time | 100094.75 | us | 7 |
| BM_tbb_very_idle/12/real_time | 100095.13 | us | 7 |
| BM_tbb_very_idle/128/real_time | 100096.93 | us | 7 |
| BM_tbb_very_idle/4/real_time | 100097.08 | us | 7 |
| BM_tbb_very_idle/48/real_time | 100097.30 | us | 7 |
| BM_tbb_very_idle/96/real_time | 100097.92 | us | 7 |
| BM_tbb_very_idle/24/real_time | 100098.70 | us | 7 |
| BM_dispenso_very_idle/2/real_time | 100212.54 | us | 7 |
| BM_dispenso_very_idle/3/real_time | 100244.96 | us | 7 |
| BM_dispenso_very_idle/4/real_time | 100254.14 | us | 7 |
| BM_dispenso_very_idle/6/real_time | 100541.71 | us | 7 |
| BM_dispenso_very_idle/8/real_time | 100772.52 | us | 7 |
| BM_dispenso_very_idle/12/real_time | 100987.16 | us | 7 |
| BM_dispenso_very_idle/16/real_time | 101009.44 | us | 7 |
| BM_dispenso_very_idle/24/real_time | 101734.64 | us | 7 |
| BM_dispenso_very_idle/32/real_time | 102316.60 | us | 7 |
| BM_dispenso_very_idle/48/real_time | 103003.23 | us | 7 |
| BM_dispenso_very_idle/64/real_time | 103934.82 | us | 7 |
| BM_dispenso_very_idle/96/real_time | 105552.21 | us | 7 |
| BM_dispenso_very_idle/128/real_time | 107498.07 | us | 7 |
| BM_dispenso_very_idle/192/real_time | 110912.72 | us | 6 |
| BM_dispenso_mostly_idle/96/1000000/real_time | 154291.95 | us | 10 |
| BM_tbb_mostly_idle/24/1000000/real_time | 189093.47 | us | 3 |
| BM_dispenso_mostly_idle/64/1000000/real_time | 229924.78 | us | 10 |
| BM_tbb_mostly_idle/32/1000000/real_time | 246954.29 | us | 3 |
| BM_dispenso_mostly_idle/192/1000000/real_time | 330263.24 | us | 10 |
| BM_tbb_mostly_idle/48/1000000/real_time | 378955.09 | us | 2 |
| BM_tbb_mostly_idle/64/1000000/real_time | 479935.20 | us | 2 |
| BM_dispenso_mostly_idle/128/1000000/real_time | 572013.68 | us | 10 |
| BM_tbb_mostly_idle/96/1000000/real_time | 760290.44 | us | 1 |
| BM_tbb_mostly_idle/128/1000000/real_time | 1012521.57 | us | 1 |
| BM_tbb_mostly_idle/192/1000000/real_time | 1202134.41 | us | 1 |


================================================
FILE: docs/benchmarks/index.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dispenso Benchmark Results</title>
<script src="https://cdn.plot.ly/plotly-2.35.2.min.js"></script>
<style>
:root {
  --bg-primary: #0d1117; --bg-secondary: #161b22; --bg-tertiary: #21262d;
  --bg-card: #1c2128; --text-primary: #e6edf3; --text-secondary: #8b949e;
  --text-muted: #6e7681; --border: #30363d; --accent: #2ecc71;
  --accent-hover: #27ae60; --accent-dim: rgba(46,204,113,0.15);
  --shadow: rgba(0,0,0,0.3); --nav-width: 240px; --header-height: 56px;
}
html[data-theme="light"] {
  --bg-primary: #ffffff; --bg-secondary: #f6f8fa; --bg-tertiary: #eaeef2;
  --bg-card: #ffffff; --text-primary: #1f2328; --text-secondary: #656d76;
  --text-muted: #8b949e; --border: #d0d7de; --accent: #1a7f37;
  --accent-hover: #116329; --accent-dim: rgba(26,127,55,0.1);
  --shadow: rgba(31,35,40,0.12);
}
* { margin:0; padding:0; box-sizing:border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif; background: var(--bg-primary); color: var(--text-primary); line-height: 1.5; }
.header { position:fixed; top:0; left:0; right:0; height:var(--header-height); background:var(--bg-secondary); border-bottom:1px solid var(--border); display:flex; align-items:center; padding:0 20px; z-index:100; }
.header-logo { display:flex; align-items:center; gap:10px; font-size:18px; font-weight:600; }
.header-logo .icon { width:28px; height:28px; background:var(--accent); border-radius:6px; display:flex; align-items:center; justify-content:center; font-size:15px; color:#fff; font-weight:700; }
.header-right { margin-left:auto; display:flex; align-items:center; gap:12px; }
.btn { background:var(--bg-tertiary); border:1px solid var(--border); color:var(--text-secondary); padding:5px 12px; border-radius:6px; cursor:pointer; font-size:13px; transition:all .2s; }
.btn:hover { color:var(--text-primary); border-color:var(--accent); }
.btn.active { background:var(--accent-dim); border-color:var(--accent); color:var(--accent); font-weight:500; }
.platform-select { background:var(--bg-tertiary); border:1px solid var(--border); color:var(--text-primary); padding:5px 12px; border-radius:6px; font-size:13px; cursor:pointer; }
.platform-select option { background:var(--bg-secondary); color:var(--text-primary); }
.sidebar { position:fixed; top:var(--header-height); left:0; bottom:0; width:var(--nav-width); background:var(--bg-secondary); border-right:1px solid var(--border); overflow-y:auto; padding:12px 0; z-index:50; scrollbar-width:thin; scrollbar-color:var(--border) transparent; }
.search-box { margin:0 14px 12px; }
.search-input { width:100%; padding:6px 10px; background:var(--bg-tertiary); border:1px solid var(--border); border-radius:6px; color:var(--text-primary); font-size:12px; outline:none; }
.search-input:focus { border-color:var(--accent); }
.search-input::placeholder { color:var(--text-muted); }
.nav-section-title { padding:6px 14px; font-size:10px; font-weight:600; text-transform:uppercase; letter-spacing:.5px; color:var(--text-muted); }
.nav-link { display:block; padding:5px 14px; color:var(--text-secondary); text-decoration:none; font-size:13px; transition:all .15s; border-left:3px solid transparent; }
.nav-link:hover { color:var(--text-primary); background:var(--bg-tertiary); }
.nav-link.active { color:var(--accent); background:var(--accent-dim); border-left-color:var(--accent); font-weight:500; }
.main { margin-left:var(--nav-width); margin-top:var(--header-height); padding:24px; max-width:1400px; }
.info-cards { display:grid; grid-template-columns:repeat(auto-fit, minmax(220px,1fr)); gap:12px; margin-bottom:24px; }
.info-card { background:var(--bg-card); border:1px solid var(--border); border-radius:10px; padding:14px; transition:border-color .2s; }
.info-card:hover { border-color:var(--accent); }
.info-card .label { font-size:10px; font-weight:600; text-transform:uppercase; letter-spacing:.5px; color:var(--text-muted); margin-bottom:4px; }
.info-card .value { font-size:16px; font-weight:600; }
.info-card .detail { font-size:12px; color:var(--text-secondary); margin-top:2px; }
.stats-bar { display:flex; gap:20px; margin-bottom:24px; padding:12px 16px; background:var(--bg-card); border:1px solid var(--border); border-radius:10px; font-size:13px; }
.stat-item { display:flex; align-items:center; gap:6px; }
.stat-dot { width:7px; height:7px; border-radius:50%; }
.stat-dot.ok { background:#2ecc71; } .stat-dot.info { background:#3498db; }
.benchmark-section { margin-bottom:40px; scroll-margin-top:calc(var(--header-height) + 20px); }
.section-title { font-size:20px; font-weight:600; margin-bottom:16px; padding-bottom:8px; border-bottom:1px solid var(--border); }
.chart-container { background:var(--bg-card); border:1px solid var(--border); border-radius:10px; padding:12px; margin-bottom:12px; transition:border-color .2s; }
.chart-container:hover { border-color:var(--accent); box-shadow:0 2px 8px var(--shadow); }
.chart { width:100%; min-height:400px; }
.chart .modebar { left: 0 !important; right: auto !important; }
@media (max-width:768px) { .sidebar { display:none; } .main { margin-left:0; } }
::-webkit-scrollbar { width:6px; } ::-webkit-scrollbar-track { background:transparent; }
::-webkit-scrollbar-thumb { background:var(--border); border-radius:3px; }
</style>
</head>
<body>

<header class="header">
  <div class="header-logo"><div class="icon">D</div><span>Dispenso Benchmarks</span></div>
  <div class="header-right">
    <select class="platform-select" id="platformSelect" onchange="switchPlatform(this.value)"></select>
    <button class="btn" onclick="toggleTheme()" id="themeBtn">Light</button>
  </div>
</header>

<nav class="sidebar">
  <div class="search-box"><input type="text" class="search-input" placeholder="Filter..." oninput="filterNav(this.value)"></div>
  <div class="nav-section-title">Benchmark Suites</div>
  <a href="#section-cascading_parallel_for" class="nav-link" data-suite="cascading_parallel_for">Cascading Parallel For</a>
<a href="#section-concurrent_vector" class="nav-link" data-suite="concurrent_vector">Concurrent Vector</a>
<a href="#section-for_each" class="nav-link" data-suite="for_each">For Each</a>
<a href="#section-for_latency" class="nav-link" data-suite="for_latency">For Latency</a>
<a href="#section-future" class="nav-link" data-suite="future">Future</a>
<a href="#section-graph" class="nav-link" data-suite="graph">Graph</a>
<a href="#section-graph_scene" class="nav-link" data-suite="graph_scene">Graph Scene</a>
<a href="#section-idle_pool" class="nav-link" data-suite="idle_pool">Idle Pool</a>
<a href="#section-locality" class="nav-link" data-suite="locality">Locality</a>
<a href="#section-nested_for" class="nav-link" data-suite="nested_for">Nested For</a>
<a href="#section-nested_pool" class="nav-link" data-suite="nested_pool">Nested Pool</a>
<a href="#section-once_function" class="nav-link" data-suite="once_function">Once Function</a>
<a href="#section-pipeline" class="nav-link" data-suite="pipeline">Pipeline</a>
<a href="#section-pool_allocator" class="nav-link" data-suite="pool_allocator">Pool Allocator</a>
<a href="#section-rw_lock" class="nav-link" data-suite="rw_lock">Rw Lock</a>
<a href="#section-simple_for" class="nav-link" data-suite="simple_for">Simple For</a>
<a href="#section-simple_pool" class="nav-link" data-suite="simple_pool">Simple Pool</a>
<a href="#section-small_buffer" class="nav-link" data-suite="small_buffer">Small Buffer</a>
<a href="#section-summing_for" class="nav-link" data-suite="summing_for">Summing For</a>
<a href="#section-timed_task" class="nav-link" data-suite="timed_task">Timed Task</a>
<a href="#section-trivial_compute" class="nav-link" data-suite="trivial_compute">Trivial Compute</a>
<a href="#section-small_buffer_shared" class="nav-link" data-suite="small_buffer_shared">Small Buffer Shared</a>
</nav>

<main class="main">
  <div class="info-cards" id="infoCards"></div>
  <div class="stats-bar" id="statsBar"></div>
  <div id="chartSections"></div>
</main>

<script>
const PLATFORMS = [{"id": "linux-threadripper-192c", "label": "AMD Ryzen Threadripper PRO 7995WX 96-Cores (192 cores)", "machine": {"timestamp": "2026-03-20T07:38:08.196941", "platform": "Linux", "platform_release": "6.19.6-100.fc42.x86_64", "platform_version": "#1 SMP PREEMPT_DYNAMIC Fri Mar  6 17:40:09 UTC 2026", "architecture": "x86_64", "processor": "", "python_version": "3.13.12", "cpu_model": "AMD Ryzen Threadripper PRO 7995WX 96-Cores", "cpu_cores": 192, "cache_size": "1024 KB", "memory_kb": 263042332, "memory_gb": 250.9, "platform_id": "linux-threadripper-192c", "compiler": {"compiler_path": "/usr/bin/c++", "build_type": "Release", "cxx_standard": "20", "compiler_id": "GNU", "compiler_version": "15.2.1", "compiler_summary": "GNU 15.2.1 Release C++20"}}, "charts": [{"id": "cascading_parallel_for_default", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For", "traces": [{"name": "serial", "x": [1000, 100000, 10000000], "y": [1433.2252991299833, 150639.50850288518, 27316695.9742447], "color": "#95a5a6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_1000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 1K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1597.6526786908753, 1595.858442974583, 1639.88987622159, 1606.382994328294, 1617.622812578432, 1595.2067424726015, 1610.6311025143316, 1620.081878863651, 1599.3818494617303, 1579.6567483116764, 1583.2650767543917, 1603.1236496781287, 1606.8433902293584, 1654.8264481226076, 1608.7302561440433], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1520.4995647436147, 1587.6488299109042, 1562.2868869101796, 1563.2679377723327, 1571.8059568422946, 1589.0819248360149, 1577.5689309645477, 1561.6740854247107, 1572.0296312435535, 1504.2616602293667, 1563.806343441008, 1559.0777526470263, 1580.6408682791498, 1524.481064226941, 1588.7388651041745], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [7433.991339623322, 16078.47437231796, 23430.244214023274, 24824.41254587218, 19421.428259172004, 24581.34914679514, 28608.203102913663, 32783.370219977776, 46944.24080683258, 59198.092111410944, 81956.91969797487, 106424.7751547934, 155061.25812069332, 213324.40847163877, 33721610.09327225], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [198933.64094199566, 181178.50977872394, 155757.8387740387, 152056.84276448246, 121280.88654681788, 127904.46490569755, 125607.84682226961, 115169.55459628168, 109557.46671139008, 118259.59770990373, 110183.69376962693, 111173.58717502355, 112956.20182903424, 117958.73329843409, 128183.382154162], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [213915.5627250637, 192088.05638498813, 175063.17725305242, 156806.49143186837, 137540.0302279798, 122403.1308253494, 104886.61170257218, 93160.11399237876, 76754.90141547972, 66536.04051422417, 56761.01577104626, 51041.092578332966, 46629.690433035066, 46020.03693160622, 47066.06885716968], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_100000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 100K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [152420.8559519597, 110566.14128587516, 139427.52661387512, 114324.97441649658, 122549.50223636653, 248321.68449647722, 212355.34019759845, 166573.28223583536, 147103.93585910206, 133182.93788038037, 131096.2630668655, 147299.6465052916, 194120.95353327875, 238309.15041857274, 559008.5814474692], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [155806.0507019226, 344469.0263342876, 80005.01594118224, 79085.78889543329, 57769.40553521435, 126800.94548960723, 140472.3950024435, 122526.22979598024, 106495.47675679946, 91151.52776202282, 88426.68602711258, 109635.16350378637, 150894.23119802756, 204875.0882977993, 376090.02265629283], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [159872.02155234266, 98067.4684004659, 80454.20059407095, 67144.6689469462, 55964.44222031628, 47969.931303731566, 44616.355894388995, 49247.61445053098, 61094.715520726924, 72138.56118349626, 93263.0810432683, 116915.38601236366, 171853.34684347536, 222402.01414646962, 56014082.13107405], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1026516.0654023791, 957365.4870926494, 1102143.7508089882, 988306.6601000979, 893393.1580934178, 915672.8841105184, 776217.5186342198, 705531.3538866563, 573279.3550034877, 489268.2841565021, 394535.05191070074, 336112.338380494, 293141.49450994027, 279125.51657129393, 296684.4456628568], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1071773.291415081, 781596.7480508754, 632527.2947928604, 522530.69828471803, 583324.8871063414, 588184.2676244666, 450804.47435245645, 380071.9295229231, 313979.94934118807, 264042.1072059679, 203398.59173080805, 174370.6011202199, 150160.3081067576, 143185.85713128824, 165620.78308088367], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_10000000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 10M Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [27044821.9201838, 23818691.519554704, 21320658.69002065, 22802100.98793167, 22022598.94501377, 19669715.201623052, 16044864.386926886, 15773170.130560175, 13811662.143047947, 13959985.89855264, 14979659.433010966, 15190604.343111595, 14973404.627083473, 15242423.189338299, 16100644.620601086], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [26952636.24036064, 40439965.76906349, 23737628.49985236, 22643515.63838621, 19997795.55388717, 17447722.88016975, 13471732.165955102, 11587558.85465747, 10471183.114568703, 10778084.486339461, 12134331.22029528, 13636197.925370652, 13122847.446917284, 13663618.960294453, 15046445.830957964], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [28141241.971752606, 23961592.211363543, 19775310.862062518, 20505360.14023237, 19671868.66910197, 9831742.032127844, 12212767.611632941, 10737530.344737355, 6821168.2674968075, 5388358.449697067, 3336718.295362185, 2724961.519743854, 2214681.2216285826, 2213046.312721268, 56947737.388933696], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [33081485.634633843, 23783631.68043383, 21404492.000779398, 17684118.26853436, 16629882.771925136, 15109424.199908972, 12930820.862258896, 11461749.349931074, 10711736.66515252, 10623305.763211653, 11888728.280432478, 13802826.54701761, 13644273.156857714, 15178413.239773365, 17276990.24239414], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [32811250.940743666, 24840233.605192583, 23003428.25290404, 20658376.589455627, 16284406.253854271, 16121470.65867303, 13186186.484138792, 11967703.426961562, 12018358.325742686, 10882708.165585436, 11134406.44139489, 12386022.635595605, 13169826.072197497, 14150836.063960854, 16822532.53995441], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "concurrent_vector_serial", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Serial/Access Operations", "categories": ["push back serial", "push back serial reserve", "iterate", "iterate reverse", "lower bound", "index", "random"], "groups": [{"name": "std::vector", "values": [2766462.8017946407, 223203.1628181616, 110669.9320345043, 107676.25464416042, 26521525.519628882, 108173.62181808498, 803848.9932213981], "color": "#808080"}, {"name": "std::deque", "values": [551471.3684873842, 0, 410258.408121762, 397184.4517098201, 35525346.1166285, 642129.8376002585, 1579802.078478492], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [4442104.092392627, 4571737.30703448, 641485.3257807029, 477181.0640233518, 34023883.50150432, 535368.4716536328, 1248877.8229209713], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [5819354.911074444, 1895297.0268256455, 332545.37928740017, 440458.46819967986, 37957291.983886905, 576431.0519265378, 1447134.739747958], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "concurrent_vector_parallel", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Parallel Operations", "categories": ["parallel", "parallel reserve", "parallel clear", "parallel grow by 10", "parallel grow by 100", "parallel grow by max"], "groups": [{"name": "std::vector", "values": [130854719.29105765, 127547476.60882357, 121309710.7764851, 33937473.810972676, 14662358.529760892, 6523151.980814574], "color": "#808080"}, {"name": "std::deque", "values": [137590424.1165913, 0, 139075949.38063994, 48044665.42678866, 18396568.97040501, 5782772.299933007], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [35857174.60828955, 36634093.21223188, 36209473.991148435, 5986262.531518902, 1851996.9759508967, 638700.5620856725], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [42354153.33487953, 20481351.95260069, 20661971.972209737, 2491046.171772348, 303953.9488707109, 70178.59761575396], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "for_each_1000", "suite": "for_each", "type": "line", "title": "For Each - 1K Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [504.1451565406422, 776.1207873219112, 813.9299861373033, 906.9246215452739, 1213.149196869415, 1491.4937167165792, 3198.479040811303, 3944.953562403192, 5288.227189100151, 7021.325825289052, 9814.161644778034, 12589.963699656302, 19302.748364242198, 27146.68666760577, 52255.37526613838], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1386.855807290837, 3641.7574130289217, 4108.569416704429, 4606.53279316108, 5205.444437046043, 6122.559341145662, 7741.884137158194, 9316.01166410268, 11401.281321257931, 13557.487687704679, 16982.930465857924, 21488.37703942905, 27163.692871145602, 34351.81184225907, 60665.23698735639], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [3171.3590820214054, 6103.33547714431, 6815.720500013154, 7595.940763970903, 8344.007434630143, 9400.140711449747, 11093.938753262242, 11691.362344246225, 13274.637421139334, 14953.140193234138, 16952.032916209282, 19074.137483038514, 22254.52252049462, 26730.257272017025, 61655.226285947356], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1196.9445597559518, 2198.8438946339147, 2527.68118657745, 2664.1779714657087, 2930.5785356735632, 3497.870373176733, 4710.616894304418, 5396.115012094816, 6471.084411088349, 7653.727108285104, 10449.629419136583, 13398.762604364425, 19365.62028482073, 24608.7401998643, 54713.62043483473], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_1000000", "suite": "for_each", "type": "line", "title": "For Each - 1M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [499676.7336804722, 254567.7460576101, 171683.6246793721, 134198.74024236796, 91573.577828548, 73578.0301553282, 51622.27599603409, 48382.789924323144, 47700.51891840983, 42158.06311678808, 40873.119955248076, 37939.86049661772, 40546.231674874114, 44040.33404056522, 74040.2906710615], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1585976.955021833, 3476038.7747415476, 3726545.8236892186, 3797711.5057245917, 4120211.8203029847, 4276171.606775261, 4621482.46913707, 4750895.320795446, 5262994.740156037, 5344901.337424383, 5361636.694655509, 5548144.477419555, 5985087.490276262, 8301171.513730771, 10954535.130319225], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [3263128.5914636035, 10622687.987525137, 10085783.621474387, 10070881.947709119, 10675387.1456802, 12962231.507671403, 11936113.489546666, 12070036.420941744, 14066570.138289446, 14206274.214515148, 14649974.77465189, 13690328.503550172, 14761054.18059624, 16547708.57104185, 17139018.46770845], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1260.2231408018342, 2095.8539969954236, 2348.9970027813993, 2576.933211569972, 3082.9263410781014, 3889.0391462683524, 4926.839427716214, 5405.769002244411, 6555.443706214966, 7791.740018662105, 10605.03160251244, 13642.709132193484, 21116.743286699104, 28050.257882473034, 54034.11351733718], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_100000000", "suite": "for_each", "type": "line", "title": "For Each - 100M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [51538677.27968582, 25965838.741948023, 18707137.439472213, 16631091.10901132, 11993987.88210172, 11844514.466722868, 9716727.94125466, 7416418.489888629, 6219252.204904571, 5572207.650251764, 5483320.050844497, 5541364.97074232, 5747581.026467447, 5954843.682683702, 6479821.017191206], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [158208208.45412806, 368762707.1381679, 376320401.49765086, 378984489.8041338, 375148260.0113377, 379307672.8004962, 458390798.39643085, 481957544.782199, 509265998.727642, 503674935.22935545, 523600320.5063753, 531398056.24959993, 588600624.7601472, 621148942.3488578, 886804518.988356], "color": "#e67e22"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_latency_default", "suite": "for_latency", "type": "line", "title": "For Latency", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [35161656.000004224, 33066793.859688915, 32234470.861462448, 31997484.652410176, 31623890.941420488, 31500270.955069963, 31135224.764618803, 31046610.366662636, 30891745.573813643, 30853800.4550535, 30805116.38237273, 30628888.51512552, 30629594.57234041, 30545800.691470504, 31041808.147325784], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [35829096.43039878, 33519619.12947797, 32357082.969079223, 32067964.604886416, 31509286.83339082, 31543902.67228513, 31239965.07287409, 31150412.51465514, 30959615.33836089, 30795198.20611869, 30632024.706231758, 30620872.19030432, 30568353.231132463, 31178996.030410584, 35274160.516062945], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [35330940.03401949, 33347624.032983497, 32374076.726916954, 32054679.605855864, 31470874.726602975, 31162876.806068998, 31032934.164586686, 30855700.08774726, 30791949.780370273, 30859108.535113975, 30647002.867601044, 30711009.46407819, 30738687.3242007, 30692516.735357728, 30671944.119282722], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "future", "suite": "future", "type": "grouped_bar_v", "title": "Future/Async Tree Build Benchmark", "categories": ["Small", "Medium", "Large"], "groups": [{"name": "serial", "values": [315383.3762683775, 2624199.9649778637, 20683777.34394744], "color": "#95a5a6"}, {"name": "std::async", "values": [173624270.70038393, 1546067071.0308478, 20324441333.99986], "color": "#e74c3c"}, {"name": "folly::Future", "values": [1881214.0792421822, 10418999.524634922, 73082731.51886232], "color": "#e67e22"}, {"name": "dispenso::Future", "values": [891101.7743970414, 3709310.3518395238, 22619541.362183843], "color": "#3498db"}, {"name": "dispenso::TaskSet", "values": [382508.1317577007, 1925946.0830548103, 12806701.222827366], "color": "#2ecc71"}, {"name": "dispenso::TaskSet (bulk)", "values": [315058.768383802, 1882936.0279714626, 12628674.385816712], "color": "#1e8449"}, {"name": "dispenso::when_all", "values": [1218033.8709797652, 4577371.980005925, 28287060.726242624], "color": "#9b59b6"}], "yaxis_unit": "ns", "xaxis": "Tree Size"}, {"id": "graph_bar", "suite": "graph", "type": "bar_h_colored", "title": "Graph Benchmark", "items": [{"name": "taskflow build big tree", "time_ns": 3003573.408952664, "color": "#f39c12"}, {"name": "build big tree (Graph)", "time_ns": 1525105.680974031, "color": "#2ecc71"}, {"name": "build big tree (BiProp)", "time_ns": 1541630.6205106026, "color": "#27ae60"}, {"name": "build bi prop dependency chain", "time_ns": 45528.82212239234, "color": "#2ecc71"}, {"name": "build bi prop dependency group", "time_ns": 463.87087945716155, "color": "#2ecc71"}, {"name": "build dependency chain (Graph)", "time_ns": 19602.78458372719, "color": "#2ecc71"}, {"name": "build dependency chain (BiProp)", "time_ns": 18466.922687013728, "color": "#27ae60"}, {"name": "execute dependency chain (Graph)", "time_ns": 7736.775617154331, "color": "#2ecc71"}, {"name": "execute dependency chain (BiProp)", "time_ns": 7873.851180787661, "color": "#27ae60"}, {"name": "forward propagator node (Graph)", "time_ns": 1384985.9745220384, "color": "#2ecc71"}, {"name": "forward propagator node (BiProp)", "time_ns": 1420770.4933902258, "color": "#27ae60"}], "xaxis_unit": "ns"}, {"id": "graph_scene_bar", "suite": "graph_scene", "type": "bar_h_colored", "title": "Graph Scene Benchmark", "items": [{"name": "scene graph parallel for", "time_ns": 13178492.165113822, "color": "#2ecc71"}, {"name": "scene graph taskflow", "time_ns": 11724416.132912867, "color": "#f39c12"}, {"name": "scene graph concurrent task set", "time_ns": 11504798.613304457, "color": "#2ecc71"}, {"name": "scene graph partial revaluation", "time_ns": 374316.1036985847, "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "idle_pool_default", "suite": "idle_pool", "type": "line", "title": "Idle Pool", "traces": [{"name": "dispenso_very_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [100106.0947088436, 100085.70019133567, 100091.04443070966, 100086.6897603763, 100092.01014015291, 100092.20519125284, 100089.49257566461, 100092.7007102984, 100091.80133541425, 100088.27214395361, 100087.2773306799, 100087.9612429777, 100087.1757610834, 100089.2536193576, 100087.49195618466], "color": "#76d7c4"}, {"name": "tbb_very_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [100084.50980586487, 100099.73704637516, 100088.5654717595, 100091.66628809735, 100092.53005098019, 100091.31337782102, 100092.768003898, 100090.79504691597, 100091.8422415409, 100093.89442746484, 100093.6893313857, 100096.92804444404, 100095.84642968894, 100096.95523573707, 100098.73604645864], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [6.090463844541889, 6.661289544623591, 4.612891171194431, 6.577072088322926, 12.8352910872034, 330.0981250610117, 438.05853693393635, 491.5541718654308, 474.3194309281403, 477.40821692028214, 471.12525384860305, 412.81347313079755, 399.09782761893234, 407.35327768259464, 502.5944144630782], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [24.1410683527769, 26.70804280757446, 29.046221756246997, 28.256679524107906, 30.801611762979128, 33.88700060717647, 55.993939556153705, 53.50397882457161, 86.09150414229958, 115.306525610755, 266.19544332110905, 444.0785125732271, 740.871004210762, 1013.5170813942352, 1564.6192073498407], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_10000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 10K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [21.286698937246648, 21.071808995184323, 24.70661612022917, 26.2714674918092, 32.37181714234788, 3071.5768562537182, 4629.588424305576, 4736.116423068204, 4723.002237338826, 4882.209928459566, 4800.202263841713, 4452.238417600832, 3980.2945027773026, 4116.552756319891, 5079.684369733821], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [237.24057652690655, 253.05716482169402, 261.4750776451775, 281.04991658276157, 336.2862033615493, 393.467231966217, 703.1549575823215, 1022.7745809198034, 2040.8301871416104, 2476.697644063582, 3656.5529715917105, 4945.5835292032925, 7870.291996183023, 10680.594606387762, 15888.419471281979], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1M Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1800.194836299925, 1804.6367125764132, 1825.4806751608323, 1841.0117675124384, 1999.8306516199946, 27330.306269844077, 454410.11800430715, 484685.65923394635, 510501.09625793993, 518575.38876356557, 477148.31298799254, 353284.20421574265, 394612.78920061886, 424057.16499779373, 508110.6572470162], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [24993.739764918977, 25686.179948910976, 27120.01293155088, 27634.592146462048, 32100.157101906963, 37571.71964950925, 71298.95682114044, 97146.88304600051, 191379.4213323854, 253836.3761268556, 379963.83500285447, 492460.22501029074, 800471.5946735814, 1073014.4805274904, 1533121.9850340858], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_100000", "suite": "locality", "type": "line", "title": "Locality - 100K Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [567718.7187480456, 261377.23079859265, 338593.3023187205, 435125.38425296237, 737377.609976842], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [539907.091952216, 271509.64724545006, 344468.83403013006, 227994.07928970255, 303918.0292229564], "color": "#1e8449"}, {"name": "omp", "x": [1, 4, 16, 64, 128], "y": [534361.3310804078, 157284.85754507312, 78043.87620448327, 133447.4162711926, 239162.16897872597], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [1439896.1380494114, 1210384.0480525761, 843992.7275303366, 414450.3189527502, 348640.0967992654], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_4000000", "suite": "locality", "type": "line", "title": "Locality - 4M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [21319798.693982694, 16379193.09338093, 9331350.288492136, 6106910.429066788, 6293437.967542559], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [21814113.86086976, 16498608.54890636, 12999220.683080394, 7029749.196566021, 5443119.798974175], "color": "#1e8449"}, {"name": "omp", "x": [1, 4, 16, 64, 128], "y": [21772791.061444763, 5690581.259042235, 1669702.3227393134, 668435.1734407442, 774372.5856306447], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [24177972.943024624, 17294070.61954581, 8296017.341472836, 5445941.191362573, 5159468.507318867], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_32000000", "suite": "locality", "type": "line", "title": "Locality - 32M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [175459722.83308706, 103129494.2674178, 78476420.64526942, 94186897.07483819, 87019019.08032596], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [174024473.74964443, 95830952.2386463, 71422076.30289097, 77103628.03431806, 92233525.54545271], "color": "#1e8449"}, {"name": "omp", "x": [1, 4, 16, 64, 128], "y": [170459606.1608754, 84148783.33591062, 36167961.78372958, 14011402.531132963, 8495982.679905314], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [177064761.9171844, 94932420.31682959, 98954595.3925699, 75029463.62850101, 88977990.62348592], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_10", "suite": "nested_for", "type": "line", "title": "Nested For - 10 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [5645.801767273857, 6697.275165019456, 5983.401576301956, 6998.933579828141, 6540.57636146004, 6968.777296394821, 7975.769465747315, 8719.130757662515, 10554.474747581737, 11676.441936318217, 14135.132480326874, 15441.503034977191, 18540.012713421227, 21810.51120573949, 25836.53621835947], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [19408.487602948073, 11423.086859705572, 9464.396425552022, 7570.0686352147, 6083.302649153736, 6959.21636301693, 5547.880920465606, 5903.693436083754, 7632.124705790503, 8676.900001221455, 11111.674568554647, 13896.118680423899, 20729.13288186201, 26988.242179455585, 6148427.24352449], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [19470.774580758323, 19643.441179284862, 18177.80331889066, 18168.583875764067, 17755.20170621908, 18918.508599713245, 18175.45897724895, 23111.17534125832, 22989.976317575827, 22029.969426948035, 21122.337256548155, 20395.713909219947, 20607.481436736864, 21075.78443250709, 19042.94967516595], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_500", "suite": "nested_for", "type": "line", "title": "Nested For - 500 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [18295488.722570654, 8085964.57161593, 5169146.091818681, 3861745.4284945684, 2542616.7619621586, 1939755.0587883552, 1311536.6354712376, 999137.3410347536, 671964.2329232227, 526180.1937020954, 393245.21192331583, 336656.9168881396, 320564.0410013469, 336909.80793298763, 452809.2592759239], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [20174591.529957324, 8591356.78214555, 5604914.588308552, 4188166.375374466, 2712720.43831338, 2003284.9732587254, 1354354.0701427981, 1036697.7502444471, 709656.8052750267, 565566.1230445926, 418497.83774902567, 344651.2053292274, 402554.89128871966, 305481.05291028117, 6285173.4604037395], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [41392151.51210212, 18216181.196115702, 12018517.332828857, 8689673.16038826, 5781770.771932661, 4302322.209795729, 2903532.70683455, 2264895.6060804655, 1526680.2910718448, 1240413.203374997, 937898.8904792177, 817992.5327060057, 715947.8044910138, 654133.9045039918, 659635.0532023926], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_2500", "suite": "nested_for", "type": "line", "title": "Nested For - 2K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1981087186.0245242, 1003708317.4684085, 672672699.0115519, 540238059.7370211, 357161863.00851405, 266615783.9952835, 177660888.66240656, 130403514.3693909, 84379928.54063244, 61954179.84417872, 40962830.912839204, 31796743.073586926, 24255071.84751849, 21095823.79139815, 22355171.75080638], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [2043693608.9536176, 1033487958.542537, 681079891.6655282, 519270054.99601364, 348933480.1561199, 263683738.87089548, 172630516.99652958, 128004181.18422386, 83264280.08149378, 61299957.289931275, 41006460.20120217, 32448058.111455154, 24436597.174128003, 18836969.500245135, 28821591.787026122], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [2353253543.0276766, 1171220326.0394745, 792243187.3545672, 594754630.496027, 402011654.59584445, 304998180.71257323, 202880749.19953942, 150309982.99614742, 98249732.62203415, 73168117.2306148, 49999158.36991644, 38545125.50121096, 28274182.681112535, 22709016.02643885, 17846946.6856482], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_1000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [561.5110823938907, 214.23315263771275, 266.32617216633525, 220.0736441399808, 255.39535468784047, 263.3105888587341, 491.48015493474156, 598.8224601639656, 786.0636636665971, 790.5374640437561, 1788.9931041003697, 1804.3723900336772, 2200.3241039857157, 2604.70770052296, 2675.0122259692844], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [111.65777588218461, 167.29135051716378, 140.260770585813, 142.4424644339689, 109.83408441116131, 95.79625733689345, 169.36375357610396, 141.54544873133185, 156.68389508393648, 191.29878418280006, 195.9300837288661, 175.70291053655444, 192.37952730383515, 215.78623763888757, 291.9531694563973], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [11609.584820852224, 14861.299618231978, 23260.45064136195, 19918.908203445666, 17536.250349432932, 13647.330984895234, 11475.700142035068, 8879.341666276256, 6995.249191162402, 6090.916638364478, 5238.977497375292, 4849.148360768058, 4463.851098543066, 4331.044172339862, 4351.2304969639945], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [816.5969390066623, 690.2990704983843, 650.1700796941151, 563.4425673823984, 494.03058602355117, 474.48593218175637, 503.51715811876863, 518.6600220013653, 591.3495094414734, 652.7099505994678, 672.6824690077553, 1296.9894108132005, 1741.5401244594393, 2125.0747842950086, 2384.798213001132], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_10000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [13906.27705875565, 3288.549179557985, 5577.736401860842, 6430.39589175149, 5439.549300315553, 5445.129815728825, 5246.521314456354, 6409.3522638928125, 9556.114627245688, 9727.944937670449, 19832.651170436293, 16602.82329775076, 24029.753790781288, 26208.495115861297, 28785.290499516308], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [3359.2989660739418, 2588.6582824136144, 2538.561227099438, 3228.1607765859612, 1825.1060094109757, 2622.078020813155, 2022.2084334415717, 1378.006859163598, 1703.3984296055562, 1627.6237450020922, 1908.9737624431145, 1910.2322587902408, 1694.7716843503902, 1027.0844538047604, 1993.8776229803518], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [450063.94078955054, 515094.5247441996, 329381.84160739183, 273963.1469982366, 290061.0505769561, 310126.4443685068, 279716.8006654829, 231524.91728043449, 198706.03650342673, 169625.51966814013, 141459.9449975261, 128411.01668891497, 118215.85717067744, 112419.9654747683, 106952.27314718068], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [25720.881506582195, 26220.078737242147, 26083.382851168237, 25945.654617519016, 25043.08612085879, 26753.065182355705, 26708.70054383538, 24913.667264977943, 20331.206776708073, 17217.193951648547, 13423.523183126807, 11273.055570200086, 9151.145182651184, 17573.208654271988, 25658.422440756112], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_300000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 300K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [2173924.262984656, 169642.42554439063, 319893.1255028583, 580504.3373644972, 556944.4946401442, 754006.3100168481, 547277.9212577734, 481394.80319805443, 341403.3738275369, 263397.59925031103, 345024.1790075476, 484914.2519989982, 691314.4426653162, 782094.419002533, 634940.2366516491], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [553279.7162595671, 185155.28640709817, 190434.54029597342, 211162.30763833632, 186047.321905128, 191267.06270035356, 143009.98845674956, 127544.1037655315, 82652.85904607957, 53943.43257955227, 49439.21600116065, 57706.742242255525, 41295.12085055467, 54539.696852926434, 56159.7689998169], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [4135870.0200216845, 3886821.4320391417, 3583077.8679810464, 3283223.564038053, 2774208.709015511, 2289450.000040233, 1759098.3270201832, 1458545.8665387705, 1067574.971995782, 854228.7249583751, 623412.1776651591, 499684.2445107177, 397035.62180511653, 356765.10849346715, 629828.5257362295], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "skipped_libs": [{"name": "folly", "color": "#9b59b6"}]}, {"id": "once_function_move", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Move Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Once Function", "values": [18.200115813128075, 12.675758420414196, 12.52188034427126, 32.45897671057875], "color": "#2ecc71"}, {"name": "Std Function", "values": [90.22610574064771, 98.04443936580248, 94.82982284240283, 103.33237633815814], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "once_function_queue", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Queue Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Inline Function", "values": [1027.143109470045, 1804.8427053006515, 3035.8301109637937, 8985.298515959676], "color": "#3498db"}, {"name": "Once Function", "values": [2801.8524209845837, 2971.3241144936474, 3446.682486087839, 6576.404779099659], "color": "#2ecc71"}, {"name": "Std Function", "values": [3361.821626088938, 3543.9121156492406, 4914.103515310017, 6987.662335701662], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "pipeline_bar", "suite": "pipeline", "type": "bar_h_colored", "title": "Pipeline Benchmark", "items": [{"name": "serial", "time_ns": 187840995.17771128, "color": "#95a5a6"}, {"name": "dispenso", "time_ns": 119103590.2269909, "color": "#2ecc71"}, {"name": "tbb", "time_ns": 166112452.5940977, "color": "#3498db"}, {"name": "taskflow", "time_ns": 120492782.23147506, "color": "#f39c12"}, {"name": "dispenso par", "time_ns": 4583813.634157973, "color": "#2ecc71"}, {"name": "tbb par", "time_ns": 4369178.378701578, "color": "#3498db"}, {"name": "taskflow par", "time_ns": 67558199.37825435, "color": "#f39c12"}], "xaxis_unit": "ns"}, {"id": "pool_allocator_1t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - Single-threaded", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [2975919.9347204734, 14348658.203839716, 13254944.664593901, 57140856.40549357, 17332000.604402572, 76420053.44628003], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [41829.69655771653, 168008.74613465345, 35292.99994628807, 143401.5512679625, 37197.92132828996, 166955.776410512], "color": "#2ecc71"}, {"name": "Arena", "values": [22772.6631609868, 91573.88395439883, 24500.514847263847, 93817.99278311372, 24446.957975564914, 93475.17965490145], "color": "#27ae60"}, {"name": "NoLock", "values": [33794.848673860324, 127849.54264510277, 30056.156463357773, 123024.96302838938, 32402.068932969254, 129539.98187919], "color": "#3498db"}, {"name": "NoLock Arena", "values": [19129.843871014305, 76020.71056192703, 18869.79938539505, 77850.0221602829, 18986.34414661028, 79856.03349275241], "color": "#2980b9"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_2t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 2 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [3282233.900150833, 15278250.93480583, 14093878.28913934, 55199268.77847562, 18578763.810219243, 104357349.50006008], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [160791.86840215698, 577326.7867842158, 179931.52002981378, 584039.71492299, 142368.2185179389, 584453.8737662578], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_8t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 8 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [4166962.9999356717, 17835011.796520676, 15078477.054885298, 54511617.81705818, 45885539.51078549, 396402581.49787164], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [3337312.323306818, 18312848.72508453, 4539657.997843248, 19361157.389700085, 4387526.597263111, 19438887.744611345], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_16t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 16 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [4450289.575121194, 19171191.3692557, 16726744.26772287, 56956960.973833255, 95670608.75027286, 875061692.4953647], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [33040409.303688698, 141298952.00200665, 31212321.33707963, 135157251.50500658, 34722476.0013793, 174422600.78516686], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "rw_lock_serial", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Serial Operations", "categories": ["2 iterations", "8 iterations", "32 iterations", "128 iterations", "512 iterations"], "groups": [{"name": "NopMutex", "values": [302898.2279790129, 321741.76084413845, 339963.1092224516, 305530.71656198817, 234241.11761073474], "color": "#95a5a6"}, {"name": "std::shared_mutex", "values": [6986149.140999297, 5735710.994366066, 5424167.795307027, 5476145.517391463, 5357427.603941037], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3485600.331573394, 3487103.6946639144, 3487332.4675634885, 3493424.1557493806, 3463243.933589156], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_2", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (2 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [6879518.196227187, 71866493.78854781, 167318594.00169924, 341148277.99688405, 677657012.323228, 1352303583.9898512], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3700086.488429688, 28392350.266221914, 133017257.26729879, 508057433.99541825, 2760446025.9899497, 9022123825.037851], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_8", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (8 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [5951811.882392383, 115266769.44534606, 341040573.9975561, 580087924.337325, 1217093817.0165756, 2481908222.078346], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3720033.916284554, 29537342.013266515, 112291602.57713182, 349326075.1944035, 1204738817.526959, 3991187106.0030537], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_32", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (32 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [5439939.5951844305, 108378071.25062683, 310903409.8451957, 887331245.0014054, 2087598671.0889266, 4436426979.955286], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3664471.061734883, 20912511.30629217, 60963214.51266042, 187014475.99839106, 591411885.6502076, 1940640599.001199], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_128", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (128 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [5272600.472741068, 70298664.95513883, 213834074.64335588, 482531272.3405659, 1080140528.5019426, 2182503418.0656075], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3696541.874298611, 15678133.679560976, 43702080.71676366, 106150809.05048862, 293041637.001027, 877953816.4963014], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_512", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (512 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [5083712.509668401, 31957196.472730074, 81975506.10984088, 220818660.00209832, 447109856.759198, 836072857.3364515], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3729210.1123302164, 14379766.992969465, 34453917.16668382, 80204004.19831276, 194945967.50009805, 482794601.6099304], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "simple_for_1000", "suite": "simple_for", "type": "line", "title": "Simple For - 1K Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [145.1576499406187, 145.1576499406187], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [149.61118094670408, 149.38001612164715, 150.43078616512554, 151.0383588350627, 150.83205236033433, 148.25166694870714, 150.48665518043407, 153.07804303421852, 151.92769032502483, 148.35583111169882, 151.38875431015967, 154.5471518311366, 150.09538709677673, 150.75179215093763, 153.91277685815493], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [832.4648379903613, 1467.2887715688491, 1500.7217809578617, 1486.78320627105, 1453.226355112451, 2514.4078403804365, 3067.598129856155, 4235.101146729439, 6169.035169282156, 7451.089281800264, 9674.570891217429, 12519.62525619168, 17646.232039260907, 22076.28717305528, 6428535.765823894], "color": "#e74c3c"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1012502.0308070816, 2433231.9090104173, 1499217.7407440073, 1119299.945966871, 1743259.860735679, 1997683.1636576657, 1858385.4431876317, 1910188.8538672186, 1917513.1979669419, 1860444.2379753995, 1855939.16174477, 1852478.6908898107, 1842383.985786944, 1640110.6900302693, 2155663.8299953192], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [24540.274503863173, 22354.358596126964, 19562.374220622096, 18559.42437674245, 22904.185512123266, 25243.202453151276, 24579.77229056028, 23452.26192820792, 21603.611224089917, 19768.60924613651, 18541.838145521946, 17531.90774049164, 18040.79448454231, 19161.111234444616, 20270.132624257603], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_1000000", "suite": "simple_for", "type": "line", "title": "Simple For - 1M Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [148426.6688174368, 148426.6688174368], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [144541.080940483, 96730.55577956568, 67619.90766465041, 52457.446046075864, 37147.996018067875, 31959.86605036914, 38171.69459803292, 41170.93334540972, 44117.384477506435, 43176.80709504921, 42286.29317292531, 35061.372302416894, 35672.71995283248, 38213.318484197625, 53955.16301712985], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [144641.39494751344, 75915.22145009942, 54520.801253825324, 41325.43440433061, 29213.35921850156, 23412.237352729913, 18878.962594416003, 14503.1855097176, 12623.428493121623, 12976.136910789395, 13527.269325258048, 15592.475089923177, 21633.21891987572, 26753.703492930174, 6562273.815523844], "color": "#e74c3c"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [21314442.99618784, 23181538.255820867, 21663585.795572232, 14317329.590872785, 10737052.410280725, 12690871.465061385, 17279020.178112622, 18369660.877757274, 19351821.975902084, 17373276.05939674, 17069242.209306415, 15856202.943027977, 15108879.653453689, 14598880.534839906, 14576062.559348542], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [253236.9001590969, 197961.7395780509, 161119.6313296689, 141510.67542643435, 116589.32333639829, 101413.52143324054, 83226.05312479338, 76789.65544027885, 67870.22136416154, 63140.80217511872, 56813.61349479082, 53719.74151370136, 52710.297401243944, 54255.577049793785, 64402.872752429794], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_100000000", "suite": "simple_for", "type": "line", "title": "Simple For - 100M Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [24937919.827015508, 24937919.827015508], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [20892851.117441393, 17081643.027606707, 13725195.16851753, 12465606.128367094, 8303066.8511263905, 9663655.05716697, 6277820.284239857, 6293089.594298697, 5955852.953168991, 5780447.599326455, 5358049.182177638, 5901207.768618031, 5717746.178619564, 5848543.381431649, 5903559.317249023], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [21714866.09244757, 18675493.4838973, 17438571.0351839, 17913056.248072706, 18064284.79962051, 14478039.413890136, 11060544.579037229, 8013386.695310768, 6325738.405951509, 5001837.920657277, 3769977.1153991325, 3296111.5179396556, 2911125.159463867, 2978487.211087172, 8407626.247817539], "color": "#e74c3c"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [200012609.49842012, 258498884.56539938, 185123988.1652873, 213046969.000802, 221819641.17626196, 264321961.3553009, 247957636.84740064, 258675649.36979085, 203259672.52481344, 201954033.2270977, 187184466.54384023, 202122975.23837712, 204517530.77806032, 205455565.29247886, 214881837.64959726], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [21923932.193407826, 18580971.660558134, 16370165.366419123, 9905376.84491097, 10035532.352048904, 7996179.568398671, 6385689.838170316, 6566035.839387626, 5841974.912108491, 5866863.586915992, 6575457.51457413, 5912406.162281897, 4983235.089904161, 6060687.081788428, 8473697.67013374], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_pool_1000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [16.521736001990295, 3.437250282336804, 82.48202245936636, 238.60889164150169, 285.6565791735474, 268.86470386473655, 552.7195157316912, 561.1728238562625, 221.64628649524582, 423.68281675906275, 299.9373104178571, 399.2788882930379, 497.9731720494199, 398.63692219246866, 383.21899944513603], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1.0502425700769027, 4.99290260076885, 27.21966166059483, 43.97671725141422, 59.30666154066613, 102.16786168843696, 170.2692574937828, 150.92567923334659, 159.41165712003786, 151.59976234750712, 98.11694822662457, 153.45995248742386, 150.18007841386384, 167.11171290584016, 196.02369462225914], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [269.13754211025355, 278.93653556157113, 354.4661606418155, 427.1909511806206, 511.76684523187345, 486.5229397578868, 452.887560073743, 518.2549154730121, 618.1528434791727, 448.00666232677287, 446.5722677606166, 478.3706501179086, 490.44937359003376, 532.9109185854693, 700.3198381488686], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [24.927065692179184, 26.329147187883446, 26.45929264204295, 27.446563866006528, 28.70057424631941, 36.240268633413855, 72.30507148525898, 51.466464692080464, 175.06843337193786, 152.23555614609532, 377.8091076774951, 636.4516592730467, 857.7263606193807, 1437.8365998815975, 1145.5587564585767], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_10000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [132.30819975826626, 28.237171047271204, 486.3578481246338, 2529.4653509684335, 2136.004001390442, 2514.671435690812, 3018.0796752811398, 2930.253258963035, 3561.7206038092054, 4842.360783987773, 5285.769109614193, 4803.573193163028, 4896.998193872802, 5149.824611798484, 3664.4027757913223], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [10.109557947339345, 44.73185133896239, 291.9235122402643, 471.6253898995781, 615.5942675366334, 937.1221643346474, 1758.5095409303904, 1626.2682075865534, 1552.580257092919, 1370.9539501107079, 1478.9639762377371, 1550.3636120403087, 1556.784197132807, 1647.0284830530197, 1587.5697459450553], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [2010.886523173893, 3757.041840811088, 4844.02545258568, 3914.6886010411818, 4235.994653532458, 4649.032283854741, 4609.0470812547965, 6889.265382189019, 5035.754096146168, 5895.224203914917, 6285.655474347466, 6649.934910819866, 5771.14592886321, 5785.03550829651, 7144.154317431011], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [263.31877018242494, 274.82677019028876, 287.0116065640696, 285.86862528836144, 306.7960046341345, 373.3840963834914, 515.0495367681879, 671.9443664342681, 1834.2539200093597, 3276.850437456166, 4719.372044130338, 5592.753475145718, 7139.984721348423, 8655.248829296117, 11522.855125013333], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_1000000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1M Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [13481.202903979769, 2947.1443183198735, 56462.300982309025, 188927.95814567112, 214237.81900666654, 353281.404163378, 366008.5449926555, 279008.6681322594, 363004.38226317056, 438891.9136021286, 356638.30938283354, 365818.34838725626, 384193.06848663837, 381785.8848293933, 354469.02667172253], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [987.4532030416054, 3870.3117051413183, 35303.08223773979, 48173.12866604576, 43775.44354938436, 161228.20807728343, 157661.44500006564, 155586.16061193438, 156651.09656817678, 159689.32940158993, 153666.270386356, 158513.41172121465, 151935.3028968908, 154440.88949256443, 148155.81942509327], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [215967.3314438098, 276269.13983840495, 395009.3394960277, 405391.7092678603, 446125.33778417856, 477376.88475172035, 476656.07373346575, 560906.1277646106, 617663.1397393066, 570848.9939958478, 505241.53766067076, 662707.637733547, 630678.9189887544, 649783.1986829018, 694757.3783497015], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [33897.193129647705, 34631.4642377103, 35223.01955993082, 37308.14375408125, 40078.42890806151, 42137.80499897742, 63213.63420883084, 80289.49611784461, 168674.38628936984, 314954.0234257334, 477441.73000101, 582578.1119929161, 808698.0386869982, 927672.6834941655, 1129510.3834709153], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "small_buffer", "suite": "small_buffer", "type": "bar_h_colored", "title": "Small Buffer Allocator Benchmark", "items": [{"name": "newdelete[S]/8192", "time_ns": 78683.88066652126, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192", "time_ns": 156759.58997214196, "color": "#2ecc71"}, {"name": "newdelete[S]/8192/threads:16", "time_ns": 98552.20900080343, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192/threads:16", "time_ns": 195985.78937062365, "color": "#2ecc71"}, {"name": "newdelete[S]/32768", "time_ns": 330888.496627741, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768", "time_ns": 523717.6264852773, "color": "#2ecc71"}, {"name": "newdelete[S]/32768/threads:16", "time_ns": 389286.9101982354, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768/threads:16", "time_ns": 791073.308781571, "color": "#2ecc71"}, {"name": "newdelete[M]/8192", "time_ns": 430077.6971718124, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192", "time_ns": 112376.09196133995, "color": "#2ecc71"}, {"name": "newdelete[M]/8192/threads:16", "time_ns": 580271.5925448057, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192/threads:16", "time_ns": 237714.8166023336, "color": "#2ecc71"}, {"name": "newdelete[M]/32768", "time_ns": 2048902.4556949376, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768", "time_ns": 450040.82409814774, "color": "#2ecc71"}, {"name": "newdelete[M]/32768/threads:16", "time_ns": 2952121.4191018688, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768/threads:16", "time_ns": 906217.6969988892, "color": "#2ecc71"}, {"name": "newdelete[L]/8192", "time_ns": 782766.3094818211, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192", "time_ns": 112184.39382140143, "color": "#2ecc71"}, {"name": "newdelete[L]/8192/threads:16", "time_ns": 1186305.3205161123, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192/threads:16", "time_ns": 267911.83978168515, "color": "#2ecc71"}, {"name": "newdelete[L]/32768", "time_ns": 3539301.873118765, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768", "time_ns": 452319.70194350433, "color": "#2ecc71"}, {"name": "newdelete[L]/32768/threads:16", "time_ns": 5523193.665873765, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768/threads:16", "time_ns": 998855.2242768964, "color": "#2ecc71"}], "xaxis_unit": "ns", "legend": [{"name": "SmallBufferAllocator", "color": "#2ecc71"}, {"name": "new/delete", "color": "#e74c3c"}]}, {"id": "summing_for_1000", "suite": "summing_for", "type": "line", "title": "Summing For - 1K Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [289.6005293875894, 289.6005293875894], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [312.2187094366394, 316.2979850203427, 312.40136814247387, 316.03588292738596, 311.8125822602392, 311.85363496150075, 324.74961020347183, 321.6150838391804, 314.4518922239314, 308.71024421408447, 326.7222974446451, 325.99103796257043, 334.79385580396104, 319.0686862685745, 334.6174949512944], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [972.5855519491528, 1488.2550375757246, 1483.3221227119222, 1460.9917857122746, 1682.8524329387374, 2503.0318058038574, 3108.55554205422, 3647.8771381501806, 5588.595004288755, 7122.483113705193, 10840.87867387822, 13560.40772585706, 19682.850388074192, 25526.95345062251, 717489.4180901389], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [27029.055686918076, 26593.643220746264, 25053.67035010068, 23320.072440003114, 21669.686142609415, 21566.44191108164, 20710.9944401379, 19528.43827800899, 20387.033166619127, 19180.09585213517, 17874.46314575948, 18881.773659865397, 18063.29994631622, 18636.519660151513, 15784.488232148879], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_1000000", "suite": "summing_for", "type": "line", "title": "Summing For - 1M Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [290511.27474489785, 290511.27474489785], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [294829.5416378372, 180554.71034606482, 135066.36860658586, 106998.65823651476, 76694.65946188818, 59949.48197379607, 42594.567229894885, 33992.32441036033, 33515.36892970289, 36247.07310039107, 35984.82354913241, 39487.31287752315, 45378.84737672402, 45672.658782108876, 65357.42835082342], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [288138.7433443224, 149156.19580228065, 101538.11962328898, 80622.18058496845, 54114.126451600634, 41697.85632194792, 29361.131554399675, 24350.770617411286, 19143.750215746404, 17171.635391448555, 16241.456883889328, 17921.592996339787, 24163.647369464117, 30706.337758464862, 6100074.037831449], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [403493.5163035475, 281568.274920703, 226901.44324902346, 189875.47055606495, 147836.1768384963, 121350.21547791566, 96203.81804214139, 79544.09240476412, 63400.96816572183, 54578.12200274297, 44730.19666766904, 40597.999420023945, 39416.406270257234, 39647.8976723195, 31362.84491624082], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_100000000", "suite": "summing_for", "type": "line", "title": "Summing For - 100M Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [29084164.53442753, 29084164.53442753], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [30539515.722517133, 17494623.040681187, 12720908.21879635, 9493494.662128285, 6979325.340785254, 5389377.613682497, 3741997.2607515273, 3177839.2187480438, 1971023.717116408, 1642503.5226647654, 1485629.6122166845, 1475880.765779452, 1484003.024248099, 1520769.213689185, 1796906.472967689], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [29233748.60481897, 15074987.069237975, 10761478.990155024, 8034382.125060801, 5883168.166867276, 5461308.927827537, 3949553.076222184, 3545358.6006352725, 2087736.1857679843, 1566006.9020447708, 1199778.177374245, 971301.5522323807, 999687.132860343, 775406.6091334885, 6689136.420634816], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [30426905.39801055, 15246663.978489647, 11014413.101560801, 9347527.619883304, 7781236.132190146, 4976691.158107875, 4627876.291065769, 3230722.2953954907, 1954722.5791987784, 1923923.2937901383, 1510703.3289669002, 1445601.7815099622, 1473428.5733698613, 1470118.0987582076, 1550643.7367082636], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "timed_task", "suite": "timed_task", "type": "grouped_bar_h_error", "title": "Timed Task - Scheduling Jitter", "categories": ["2ms (Normal)", "2ms (Steady)", "4ms (Normal)", "4ms (Steady)", "6ms (Normal)", "6ms (Steady)", "Mixed (Normal)", "Mixed (Steady)"], "groups": [{"name": "dispenso", "values": [27.140469872392675, 33.15717534035652, 27.048115013167187, 35.652824022868316, 34.79623445309686, 21.994207497414084, 5.729239188755512, 13.25252736439585], "errors": [39.26980323384966, 44.912856182945696, 31.390342298115364, 20.879758342796617, 48.66726682928979, 19.398465095117334, 5.81856081671094, 31.67401925688938], "color": "#2ecc71"}, {"name": "folly", "values": [95.06486796773933, 93.6233249886268, 96.95655324961982, 93.66584334868824, 112.35004855785579, 78.75578021719309, 65.1959498863039, 59.60014752544842], "errors": [141.78820903420984, 57.420382483699015, 270.12745352402544, 6.205024556313576, 411.4314817090677, 30.58280796108715, 197.75921061042865, 3.9182525955013396], "color": "#9b59b6"}], "xaxis_label": "Mean Scheduling Error (us)"}, {"id": "trivial_compute_100", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1350.67698304802, 1376.2797238766152, 1405.6027138750324, 1411.29718495238, 1379.7596765673707, 1364.417215676475, 1376.6631367391433, 1356.6830931164197, 1400.5808996084356, 1375.2972452727515, 1358.0364768211948, 1489.0797786133835, 1509.090866917206, 1442.3813020266336, 1551.350254099816], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [2090.2514677630497, 2142.0910936523483, 2235.182444719835, 1921.9938130546711, 2187.5988356598546, 2685.5481395255333, 3253.1471155468093, 4338.474365387438, 6456.13749719944, 7778.469836066934, 10413.30787204524, 13430.334520099297, 19702.18571359484, 26262.325690816368, 6148972.6933130305], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [8696.457077940235, 9725.35110363063, 10349.578688842974, 10422.62069369878, 11400.623008853066, 12341.240041609353, 13488.799352548083, 15118.654041821126, 16057.454124670176, 16331.867219594173, 16659.770673448176, 16829.03621358427, 17800.632064203106, 18587.926018693677, 15276.640580268577], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_1000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 1M Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [51863849.75648962, 51863849.75648962], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [52309520.02429625, 25377341.58353747, 17288701.50832189, 12858173.644250156, 8666508.236447835, 6623211.646546559, 4426180.934914206, 3309469.7488249633, 2254618.149348048, 1715786.9304059078, 1288945.7159792213, 1154805.4747543826, 1016670.1793926472, 1028019.4289966971, 1019556.6713891589], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [49584102.14333396, 42985081.08108856, 28435189.70864049, 21895593.45349631, 14841395.59241677, 11266212.746173747, 7575781.794602645, 5653712.32614363, 3786611.1127126543, 2840537.340957621, 2011657.779440748, 1558994.9003856333, 1215047.6416775505, 1001926.9168988806, 6245890.788971569], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [49410317.42491759, 25674075.201442022, 17174998.41939688, 13252048.701863527, 8928044.167226452, 6789485.354637426, 4546968.570663927, 3428122.7447293736, 2388410.036200889, 1803849.158026208, 1271657.4115311783, 1033947.1428803285, 790595.2599505514, 643282.8781693627, 571341.3915533718], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_100000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100M Elements", "traces": [{"name": "serial", "x": [1, 192], "y": [1399299244.4545963, 1399299244.4545963], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1375330757.5243526, 714575235.3438487, 489631742.5009329, 369329597.1606858, 249211537.62261385, 188320188.45502964, 125901212.70349139, 94574359.67950997, 63193635.940975085, 48424842.29646454, 34063354.33734761, 27665412.932013474, 22667969.424470235, 20711409.05294063, 19291527.787263088], "color": "#2ecc71"}, {"name": "omp", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1415209950.9956315, 726854279.016455, 508420653.0009396, 388268271.39128, 275552845.5018066, 217170940.793585, 157498503.0769872, 128075455.58248152, 97735861.54915394, 82937143.76166463, 70971454.56743117, 64620135.34473954, 59362319.13640061, 55723424.43141482, 58871815.69252991], "color": "#e74c3c"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192], "y": [1352936672.9781032, 687719183.3142812, 460955460.4161531, 349988193.5034258, 236514067.44546774, 179268747.91737643, 119136739.16650522, 89829074.17837693, 61243230.12067401, 46501795.10936141, 32990284.24155568, 27317037.134167008, 21736599.171576515, 20283679.438669387, 18457328.223962788], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}]}, {"id": "macos-m4-pro-12c", "label": "Apple M4 Pro (12 cores)", "machine": {"timestamp": "2026-03-20T10:21:46.312656", "platform": "Darwin", "platform_release": "25.3.0", "platform_version": "Darwin Kernel Version 25.3.0: Wed Jan 28 20:51:28 PST 2026; root:xnu-12377.91.3~2/RELEASE_ARM64_T6041", "architecture": "arm64", "processor": "arm", "python_version": "3.12.13+meta", "cpu_model": "Apple M4 Pro", "cpu_cores": 12, "memory_gb": 48.0, "platform_id": "macos-m4-pro-12c", "compiler": {"compiler_path": "/usr/bin/c++", "build_type": "Release", "cxx_standard": "20", "compiler_id": "AppleClang", "compiler_version": "17.0.0.17000604", "compiler_summary": "AppleClang 17.0.0.17000604 Release C++20"}}, "charts": [{"id": "cascading_parallel_for_default", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For", "traces": [{"name": "serial", "x": [1000, 100000, 10000000], "y": [700.5002372642518, 82564.75728780165, 10531123.778919743], "color": "#95a5a6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_1000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 1K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12], "y": [625.2072446734195, 609.6258327131047, 614.8547995581773, 619.0147180562062, 615.6145224666849, 615.83017610162, 616.0202263334468], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12], "y": [605.3549102323939, 617.2509080811373, 613.3981068106079, 612.0413117420418, 617.5104589566071, 613.3846055813841, 612.7556064463755], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [40468.82442649334, 42482.3762312714, 45956.75770636757, 45833.22422490524, 46524.635821323674, 48438.211717099184, 56151.88080459915], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12], "y": [46310.25147244597, 36138.46328049984, 31020.54828164434, 29387.4583047368, 26002.083333358834, 28964.18050935298, 25367.38346523467], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_100000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 100K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12], "y": [85654.28327098917, 42077.09785398746, 45516.625336456906, 28130.824395788124, 51310.06670417264, 69758.33538707634, 74868.07102310973], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12], "y": [85467.14405634196, 79109.74436570091, 39438.96633034339, 24877.132421022685, 33889.15822521801, 33917.59121880441, 52009.26850175269], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [194204.43730386806, 143290.9542392476, 127191.27097948182, 116269.12205482468, 111341.52506072912, 106468.23834384317, 113265.23871415642], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12], "y": [191380.60106839883, 119517.87857815709, 104196.9531729645, 86986.97978760907, 69326.33316816197, 63590.45366329318, 61339.696066824254], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_10000000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 10M Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12], "y": [10497190.92526487, 6129634.956029796, 5701286.0676931115, 4770311.714269053, 4712284.769588184, 4715272.09417932, 5178351.670037955], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12], "y": [10221724.044126661, 8297997.093651184, 5467769.354361836, 4694495.618184048, 4577143.490160292, 4465798.116589506, 4684942.952613503], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [13662088.23552976, 7987072.793017516, 5930657.608879972, 5149498.361888833, 4815002.013768615, 4719341.271968723, 4786840.799778444], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12], "y": [13977541.660424324, 8006809.658995322, 5897592.034433847, 5095387.384653145, 4709900.731858391, 4616176.873747728, 4597889.256394027], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "concurrent_vector_serial", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Serial/Access Operations", "categories": ["push back serial", "push back serial reserve", "iterate", "iterate reverse", "lower bound", "index", "random"], "groups": [{"name": "std::vector", "values": [317631.75918000605, 261712.85686013865, 39443.283205270054, 64897.47868062467, 24293461.204345886, 39254.75478558765, 592005.574148366], "color": "#808080"}, {"name": "std::deque", "values": [614917.6790694814, 0, 290307.6555123664, 295664.90570821706, 61653189.371678635, 237289.22645850817, 628493.8764702143], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [1901861.0733125925, 1891775.1136688967, 666610.9269613884, 671254.0748197861, 57641239.58457882, 667483.0545559882, 1280694.1369220752], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [1892992.4542159848, 1833904.557434735, 516669.6923579964, 516030.6968759185, 38295150.44344589, 499907.56601044675, 818150.7488936796], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "concurrent_vector_parallel", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Parallel Operations", "categories": ["parallel", "parallel reserve", "parallel clear", "parallel grow by 10", "parallel grow by 100", "parallel grow by max"], "groups": [{"name": "std::vector", "values": [10624761.82375355, 10547246.835548196, 10716977.430598088, 1948020.7374894537, 914227.1580535817, 420329.71342349506], "color": "#808080"}, {"name": "std::deque", "values": [11115774.874471955, 0, 11207459.514428463, 2911470.346509676, 1872851.9370237926, 695439.1593492385], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [87730963.00299382, 87510212.99718155, 88056013.89354302, 9460173.790821021, 1857728.3462802845, 1028960.8476811956], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [68354545.45745796, 68953137.50719652, 68515670.45404491, 6772294.874491097, 705237.9522188487, 65862.77835994957], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "for_each_1000", "suite": "for_each", "type": "line", "title": "For Each - 1K Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12], "y": [55.4285662143635, 209.61013946923532, 497.69985200673796, 797.0115552080647, 1681.6550966608227, 2194.8983982229347, 4148.266248173059], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12], "y": [1724.050130778557, 3744.823717605097, 4458.698299872223, 5686.056015611495, 11357.86527222049, 13613.211335330809, 21813.72889580406], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6, 8, 12], "y": [2460.3825501094193, 3443.647309368769, 4992.919059544994, 6003.801196348504, 10456.12934644083, 13214.687562046598, 26436.440189742774], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6, 8, 12], "y": [852.899485639906, 1260.7515992551105, 1735.8688661636231, 2197.126010211535, 3847.4478202412515, 5052.495694912164, 6499.172459342831], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_1000000", "suite": "for_each", "type": "line", "title": "For Each - 1M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12], "y": [49261.80024954423, 26275.125356732882, 24247.138666426854, 17623.938267413094, 21452.541621925207, 19786.268223890063, 27283.67128917349], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12], "y": [1743384.40930762, 3713450.2687671734, 5034803.749294952, 5919903.669497767, 12118319.915686497, 16010042.577464549, 51859881.91321182], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6, 8, 12], "y": [2584803.2406496783, 4416677.815347293, 6871792.702350673, 7257237.797603011, 11571150.278809985, 17512853.126390837, 54945833.30582827], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6, 8, 12], "y": [877.080617834821, 1333.3816144933303, 1806.667626529415, 2370.3171414366916, 3947.9827332155937, 5488.374464619157, 6589.241517441614], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_100000000", "suite": "for_each", "type": "line", "title": "For Each - 100M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6919777.227132259, 4510779.574423307, 4024521.068045446, 3582819.6610777806, 3481717.9146921267, 3446394.502497255, 3751593.9732250622], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12], "y": [173638468.7537793, 382169207.9654895, 500838895.9686272, 637812250.0609607, 1427234707.9822793, 1651335333.9396417, 5021056332.974694], "color": "#e67e22"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_latency_default", "suite": "for_latency", "type": "line", "title": "For Latency", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [54589112.15492452, 43784244.40510571, 43358578.46993296, 42765568.58573766, 41872441.172873706, 41280727.943076804, 42843593.75014901], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [48862776.785556756, 35739713.55945104, 43657950.49670851, 42058494.74965362, 40954658.87803584, 42402966.129884586, 41874554.68490952], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "future", "suite": "future", "type": "grouped_bar_v", "title": "Future/Async Tree Build Benchmark", "categories": ["Small", "Medium", "Large"], "groups": [{"name": "serial", "values": [219139.01982541961, 1746741.9372347356, 14027711.65827289], "color": "#95a5a6"}, {"name": "std::async", "values": [354523624.9919981, 31890174.27344383, 254601930.6561599], "color": "#e74c3c", "error_indices": [1, 2]}, {"name": "folly::Future", "values": [4020985.052920878, 31890174.27344383, 254601930.6561599], "color": "#e67e22"}, {"name": "dispenso::Future", "values": [690767.3022359964, 4652623.606380075, 34612708.300119266], "color": "#3498db"}, {"name": "dispenso::TaskSet", "values": [577466.0965300396, 4399069.448341973, 34593860.39758101], "color": "#2ecc71"}, {"name": "dispenso::TaskSet (bulk)", "values": [574602.0913438357, 4764640.664731796, 33979803.573207125], "color": "#1e8449"}, {"name": "dispenso::when_all", "values": [678018.4960766424, 4746979.870675068, 36242789.472453296], "color": "#9b59b6"}], "yaxis_unit": "ns", "xaxis": "Tree Size"}, {"id": "graph_bar", "suite": "graph", "type": "bar_h_colored", "title": "Graph Benchmark", "items": [{"name": "taskflow build big tree", "time_ns": 2875657.4077010457, "color": "#f39c12"}, {"name": "build big tree (Graph)", "time_ns": 827747.3067011156, "color": "#2ecc71"}, {"name": "build big tree (BiProp)", "time_ns": 862210.0879697884, "color": "#27ae60"}, {"name": "build bi prop dependency chain", "time_ns": 60791.02103590594, "color": "#2ecc71"}, {"name": "build bi prop dependency group", "time_ns": 541.1421435173419, "color": "#2ecc71"}, {"name": "build dependency chain (Graph)", "time_ns": 9584.254691533728, "color": "#2ecc71"}, {"name": "build dependency chain (BiProp)", "time_ns": 10396.300373474847, "color": "#27ae60"}, {"name": "execute dependency chain (Graph)", "time_ns": 3659.363984078001, "color": "#2ecc71"}, {"name": "execute dependency chain (BiProp)", "time_ns": 3698.6554966538197, "color": "#27ae60"}, {"name": "forward propagator node (Graph)", "time_ns": 951695.781049668, "color": "#2ecc71"}, {"name": "forward propagator node (BiProp)", "time_ns": 990644.596558096, "color": "#27ae60"}], "xaxis_unit": "ns"}, {"id": "graph_scene_bar", "suite": "graph_scene", "type": "bar_h_colored", "title": "Graph Scene Benchmark", "items": [{"name": "scene graph parallel for", "time_ns": 8991288.89432177, "color": "#2ecc71"}, {"name": "scene graph taskflow", "time_ns": 5869669.512383895, "color": "#f39c12"}, {"name": "scene graph concurrent task set", "time_ns": 6132357.387795602, "color": "#2ecc71"}, {"name": "scene graph partial revaluation", "time_ns": 211265.1030637419, "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "idle_pool_default", "suite": "idle_pool", "type": "line", "title": "Idle Pool", "traces": [{"name": "dispenso_very_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [103949.43443991776, 102323.14886525273, 102151.94643075977, 101760.25587134063, 101917.30356642178, 102839.75000493228, 101509.90472175181], "color": "#76d7c4"}, {"name": "tbb_very_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [104556.84515181929, 104071.60714800868, 104046.22029247029, 102836.98800152967, 104953.79771704653, 104008.9107118547, 103789.22613537205], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [2.6099716967115327, 2.876337148986388, 4.577939389659891, 6.169086150290484, 33.59804067846777, 120.55436597638365, 99.13712003313648], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [19.876605094076083, 21.382990475001293, 22.286794489143066, 22.525470509770873, 27.53688208799312, 30.76591581644336, 31.450521047075117], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_10000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 10K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [10.086900307693051, 9.700433531144588, 19.14006889841207, 23.499718650882027, 47.57320341472196, 541.0529615320906, 456.7189913485089], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [202.49054977371108, 211.45170473564627, 218.94605381951504, 223.13469074011047, 266.5956322427677, 291.8967149035802, 307.4214535463992], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1M Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [784.5392460475488, 767.9795059623211, 781.1575677727682, 794.2233294421088, 924.4239060244159, 9090.774577672188, 5547.0087507274], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12], "y": [20110.643383197705, 21075.44572826362, 21924.488279182697, 22010.22312886292, 27368.96795865435, 28985.84850217131, 27960.51908323231], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_100000", "suite": "locality", "type": "line", "title": "Locality - 100K Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [260737.31134979508, 126822.4602494415, 233759.12656686342, 890183.0614990272, 1744616.6465763834], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [256090.7359074651, 133841.48384591582, 110695.41330053339, 162321.85666438678, 287738.9424298849], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [445868.729002541, 205472.97945906932, 173522.08809559952, 173656.44600198994, 173070.272941234], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_4000000", "suite": "locality", "type": "line", "title": "Locality - 4M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [10334427.074720973, 4752867.173828535, 2846860.394078451, 3673726.0665410226, 4649647.163121519], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [10075088.700146547, 4509631.679423201, 2717709.7443036065, 2859782.0796892936, 2831912.5803297055], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [15213417.587801814, 4108408.823445001, 2564908.771595934, 2547032.087633427, 2570804.0435953685], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_32000000", "suite": "locality", "type": "line", "title": "Locality - 32M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [87189692.75651035, 38419599.56076203, 23323620.832525194, 24227678.173639137, 25613959.82463312], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [80980611.11134787, 36509410.049276136, 26487858.03898203, 24607481.310497325, 28115873.483733997], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [120765617.99040063, 33021849.24195033, 23729265.7959564, 23557516.66861276, 23197238.90007784], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_10", "suite": "nested_for", "type": "line", "title": "Nested For - 10 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [2479.9473156576137, 1714.804801569383, 1468.1176928052757, 1326.2928186469464, 2478.049009819242, 2678.877158411769, 4652.9743209281805], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [14961.287396672922, 12207.42807181078, 12109.488096784127, 11760.601240849077, 11638.886580109067, 14006.300714716304, 12124.174831781393], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_500", "suite": "nested_for", "type": "line", "title": "Nested For - 500 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [5935303.946898784, 3007479.995645025, 2061836.6300057666, 1524610.7034138679, 1016174.1976500363, 776653.9522837027, 841270.4513078536], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [8783293.869591465, 4241980.457057555, 2869621.8084425055, 2134516.5890350845, 1444861.9526096864, 1110844.1524429114, 1001690.9234882687], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_2500", "suite": "nested_for", "type": "line", "title": "Nested For - 2K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [583668707.9863622, 294945667.0321524, 212272416.6838452, 188557124.98771027, 113317701.34468873, 80586281.241267, 64663950.00461489], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [453164749.9534302, 243543985.998258, 161323500.01134908, 112321208.39413257, 71781819.45346296, 50853854.17138847, 43659128.00754554], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_1000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [237.19465645511926, 151.14610276406873, 157.7835167379135, 142.0926124703182, 308.4210979784735, 401.3422819099427, 565.7262010323052], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12], "y": [69.99702763920337, 121.31665213227481, 109.35908843443853, 108.74788342634731, 175.95745601048554, 145.60373717097258, 185.18111481053103], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6308.988814621612, 6073.80393713746, 5519.547379545627, 5577.856549488395, 10230.621324701453, 15092.790765846346, 26041.050924471132], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [677.9331226057667, 542.970623957662, 587.9234428400231, 555.4199165536756, 587.1199152306101, 563.036702108008, 577.751663792432], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_10000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [7160.414123137919, 2193.1773761789486, 2517.0265841314495, 2530.2844020137, 4163.695912900734, 4653.025000090046, 4338.411623938306], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12], "y": [1895.4101352121781, 1733.7116674752906, 1413.0601574664893, 1257.5391203369848, 1902.5739074677497, 1790.606889443182, 1817.8500427063882], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12], "y": [228647.87497868142, 199361.65264031538, 176662.8750192467, 179852.5414778851, 333325.68750483915, 512416.7500762269, 881811.0000574961], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [21761.02016106128, 17681.982731673776, 15489.978723208162, 12552.85566107237, 11108.409728022349, 10250.611914255258, 9308.678650113783], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_300000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 300K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [1148256.374988705, 187597.614742117, 196777.96875475906, 209864.9999825284, 281596.97198619443, 356942.9999915883, 444069.0830233507], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12], "y": [292041.08350677416, 130980.92500586063, 111263.52084102109, 100296.16670882596, 103861.78570999099, 82522.8843346445, 85837.98962354194], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [3296486.375038512, 2090060.2079927921, 1534770.958009176, 1185793.9169276506, 867856.0410626233, 734020.6250082701, 778120.04100997], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "skipped_libs": [{"name": "folly", "color": "#9b59b6"}]}, {"id": "once_function_move", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Move Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Once Function", "values": [5.204623791909096, 6.812845839092673, 7.438441518400829, 27.811982148424182], "color": "#2ecc71"}, {"name": "Std Function", "values": [24.158670975557346, 24.550034826692528, 24.582920522600364, 44.21355443294617], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "once_function_queue", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Queue Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Inline Function", "values": [627.0593616821525, 1124.6337175150072, 1817.4631815245443, 7571.629852905779], "color": "#3498db"}, {"name": "Once Function", "values": [1398.8958840787318, 1539.047102661202, 1890.5727399449413, 10222.249092835804], "color": "#2ecc71"}, {"name": "Std Function", "values": [2734.0989397390713, 2964.939429110469, 3246.7841850951504, 10034.847177918622], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "pipeline_bar", "suite": "pipeline", "type": "bar_h_colored", "title": "Pipeline Benchmark", "items": [{"name": "serial", "time_ns": 114005916.67431097, "color": "#95a5a6"}, {"name": "dispenso", "time_ns": 60666458.33197981, "color": "#2ecc71"}, {"name": "tbb", "time_ns": 105327486.00235207, "color": "#3498db"}, {"name": "taskflow", "time_ns": 100358023.85703261, "color": "#f39c12"}, {"name": "dispenso par", "time_ns": 13244808.961215585, "color": "#2ecc71"}, {"name": "tbb par", "time_ns": 18570681.720221832, "color": "#3498db"}, {"name": "taskflow par", "time_ns": 100174589.2821678, "color": "#f39c12"}], "xaxis_unit": "ns"}, {"id": "pool_allocator_1t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - Single-threaded", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [297837.8959717182, 1393100.5615561914, 312082.44089198427, 1286931.7644694145, 3967560.369346757, 15925258.522408761], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [35381.84665267378, 144298.9355109866, 35833.98831308906, 144236.78231143608, 35744.06120115258, 146309.707341323], "color": "#2ecc71"}, {"name": "Arena", "values": [21265.480894466433, 86182.02232244628, 21059.53734711275, 85263.86002597825, 21021.85662787536, 86704.20746064727], "color": "#27ae60"}, {"name": "NoLock", "values": [32329.23618875474, 133055.0820002276, 32361.86439259558, 132326.7975434407, 32453.493513815025, 133103.29765478693], "color": "#3498db"}, {"name": "NoLock Arena", "values": [21064.785143470246, 84566.9069640627, 21147.46064142755, 85567.00244149553, 21179.439691280913, 84837.98606465747], "color": "#2980b9"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_2t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 2 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [1257238.148684627, 5947916.66411891, 1016821.344399603, 1979630.9191059102, 17206997.976910956, 69498659.09039296], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [191077.9184113632, 1017347.9984911489, 193045.1749578554, 1071525.3937138817, 191963.53768421395, 1022619.2788928854], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_8t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 8 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [3733305.555581061, 15166702.127936196, 3693569.2224355917, 15767791.676586743, 98668744.8748853, 387625041.53419286], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [21899995.53541254, 94885845.79667076, 17763089.136375736, 110689090.91409296, 25595886.67936623, 73615657.45021734], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_16t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 16 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [8368770.837058457, 34217269.91402228, 9259785.644503104, 40081666.66608304, 218771116.80805683, 844508291.920647], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [13813426.250126215, 98524399.99813214, 16729679.932309823, 111432819.50110881, 6825904.255396033, 68130451.39036451], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "rw_lock_serial", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Serial Operations", "categories": ["2 iterations", "8 iterations", "32 iterations", "128 iterations", "512 iterations"], "groups": [{"name": "NopMutex", "values": [285183.70649920136, 286609.3173463474, 291821.08153557987, 333605.1836488914, 304676.5871115709], "color": "#95a5a6"}, {"name": "std::shared_mutex", "values": [10781069.015138201, 10617996.84998043, 10062712.499140095, 9875637.323359473, 9892706.59596918], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3389171.1167050796, 3393791.261093088, 3316036.556772712, 3430593.8475188753, 3324420.814804062], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_2", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (2 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads"], "groups": [{"name": "std::shared_mutex", "values": [10857366.030820861, 48073186.1355404, 223555250.01573065, 868961500.0039339], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3584332.2789863814, 29757812.502793968, 153225666.7560432, 1005173167.0973823], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_8", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (8 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads"], "groups": [{"name": "std::shared_mutex", "values": [10633065.384955935, 48234949.354082346, 211338639.02650774, 945047250.0175238], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3549493.4747494156, 25392336.44320457, 129610604.16031007, 681908959.0571821], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_32", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (32 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads"], "groups": [{"name": "std::shared_mutex", "values": [10262880.435523886, 51817922.64504891, 210097471.98938084, 902531540.9526229], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3500579.1455591517, 19340842.104467906, 93561865.77739815, 421600875.0372566], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_128", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (128 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads"], "groups": [{"name": "std::shared_mutex", "values": [10063079.757882016, 64839170.908089735, 201540541.5149871, 769367125.0017359], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3494274.7902823617, 18518503.631323416, 80677546.32793367, 303473750.01292676], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_512", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (512 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads"], "groups": [{"name": "std::shared_mutex", "values": [10008825.586243933, 107829791.57686765, 187002697.9886461, 527345874.9288693], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [3497630.653168126, 15670082.399932045, 72606312.49907117, 345658187.5216216], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "simple_for_1000", "suite": "simple_for", "type": "line", "title": "Simple For - 1K Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [3.7497375160455705e-10, 3.7497375160455705e-10], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [46.04082318406511, 46.121137047132294, 45.6716139252072, 45.31050043920065, 45.043442278045454, 44.65237938838554, 45.36657069061462], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12], "y": [353843.63749762997, 611494.5324984838, 676986.7403423571, 652553.273910907, 673465.8076437403, 669435.7599653136, 640626.19177924], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [8165.779896374998, 8037.026042295156, 8037.461237201799, 8240.657407288209, 8341.751115465577, 8312.600589860573, 6706.632388555734], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_1000000", "suite": "simple_for", "type": "line", "title": "Simple For - 1M Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [3.33995558321476e-10, 3.33995558321476e-10], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [53254.1927425466, 29055.281845266134, 25434.214130765933, 20753.614803694283, 21763.29528190336, 19784.576820456372, 30466.086278966693], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6618754.4964528605, 7100949.651334974, 6924421.50928823, 6658505.492121828, 6728089.886210299, 6686934.74677363, 6173689.661123745], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [100227.72385754847, 59484.88355694319, 52727.867978380484, 43392.40593667623, 34315.32071344646, 31916.541818011847, 29117.38528583078], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_100000000", "suite": "simple_for", "type": "line", "title": "Simple For - 100M Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [4.159519448876381e-10, 4.159519448876381e-10], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6973216.671030968, 4434089.062851854, 4035585.920331673, 3537435.68695604, 3492578.0762363262, 3488039.7899542004, 3816219.086142918], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12], "y": [65613551.46935539, 62734740.45696535, 55741849.60810605, 66204095.930727415, 66936078.59249988, 67894904.34245883, 68204006.95072576], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [7338198.684891196, 4650172.735070994, 3990181.3467043787, 3615828.155578534, 3496726.758979336, 3519768.33480876, 3609583.9740039827], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_pool_1000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6.6472640090395885, 53.13554169842973, 115.53375434739398, 176.5114003512969, 336.5354872048485, 366.3849066597106, 367.56199222496895], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12], "y": [0.7554262456378871, 36.13760710292121, 74.59056004233452, 83.86936523736156, 149.81741747684936, 125.04704669342217, 149.13546308786368], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12], "y": [148.68277557375256, 170.54736633995935, 182.11511766673323, 261.15918846509015, 307.15302201430643, 335.81870731240946, 311.4121848613852], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [19.8398803228476, 21.453196654229036, 22.69957908730194, 23.50722269228718, 28.096659465626274, 30.80636348632513, 35.63749802625345], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_10000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [65.35277969934586, 791.3980158735757, 1056.4191873696327, 1480.8765901477043, 3175.5945183923122, 3376.355930463184, 4013.4710867946246], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12], "y": [7.199973038284088, 355.62803120720656, 766.3786014103714, 849.6708571910858, 1335.4970106814542, 1266.95488848222, 1527.3928964851088], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12], "y": [1534.4369516752888, 1571.235376169972, 1884.3878383955193, 2626.10696147328, 3088.5960000037885, 3101.0480765521957, 3003.0686610742755], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [198.3208854826877, 214.01594311394868, 225.85424600509342, 236.0107824733456, 271.23990176574523, 302.4201432583473, 350.3743648728832], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_1000000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1M Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6503.729743524155, 64677.60609335859, 101286.94788727444, 136104.68339174986, 316134.353983216, 353628.5205045715, 384219.5204924792], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12], "y": [718.8960889369584, 35827.11874623783, 82232.31011329012, 88812.23437492736, 139060.3750187438, 134771.74999813238, 153261.50821056217], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6, 8, 12], "y": [159814.14599809796, 170841.8227499351, 188804.3542567175, 263194.27764974535, 296969.9170207605, 281913.70848799124, 284064.2704977654], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [118496.85420747846, 71578.05089890544, 48723.08976840801, 34684.518500903825, 31769.51890451495, 38521.57458344376, 39328.79821054245], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "small_buffer", "suite": "small_buffer", "type": "bar_h_colored", "title": "Small Buffer Allocator Benchmark", "items": [{"name": "newdelete[S]/8192", "time_ns": 125303.26364931643, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192", "time_ns": 53302.40831650446, "color": "#2ecc71"}, {"name": "newdelete[S]/8192/threads:16", "time_ns": 314926.0268187123, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192/threads:16", "time_ns": 120258.6493397035, "color": "#2ecc71"}, {"name": "newdelete[S]/32768", "time_ns": 508444.8953462875, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768", "time_ns": 214782.18355085887, "color": "#2ecc71"}, {"name": "newdelete[S]/32768/threads:16", "time_ns": 1201311.3235155412, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768/threads:16", "time_ns": 444873.3035522829, "color": "#2ecc71"}, {"name": "newdelete[M]/8192", "time_ns": 124989.33357486768, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192", "time_ns": 60749.228267215316, "color": "#2ecc71"}, {"name": "newdelete[M]/8192/threads:16", "time_ns": 429508.1455493346, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192/threads:16", "time_ns": 162576.32181965004, "color": "#2ecc71"}, {"name": "newdelete[M]/32768", "time_ns": 505341.70981474785, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768", "time_ns": 244387.94149076848, "color": "#2ecc71"}, {"name": "newdelete[M]/32768/threads:16", "time_ns": 3098163.281538291, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768/threads:16", "time_ns": 678291.8443492603, "color": "#2ecc71"}, {"name": "newdelete[L]/8192", "time_ns": 138000.96970177098, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192", "time_ns": 57018.46957966995, "color": "#2ecc71"}, {"name": "newdelete[L]/8192/threads:16", "time_ns": 1934699.7973154932, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192/threads:16", "time_ns": 190252.0963226593, "color": "#2ecc71"}, {"name": "newdelete[L]/32768", "time_ns": 573576.0461350502, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768", "time_ns": 225975.01609099738, "color": "#2ecc71"}, {"name": "newdelete[L]/32768/threads:16", "time_ns": 7334677.465093722, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768/threads:16", "time_ns": 757798.5616080696, "color": "#2ecc71"}], "xaxis_unit": "ns", "legend": [{"name": "SmallBufferAllocator", "color": "#2ecc71"}, {"name": "new/delete", "color": "#e74c3c"}]}, {"id": "summing_for_1000", "suite": "summing_for", "type": "line", "title": "Summing For - 1K Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [61.868015977933, 61.868015977933], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [73.21236872374634, 73.83848047515738, 75.63617836556358, 75.48505532789856, 76.54764035438431, 75.94152857903019, 76.52603958821783], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [8098.8852890526505, 7902.885981032539, 7910.169754879776, 7924.401366363232, 8467.075966689852, 9021.862826939605, 7333.295140880929], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_1000000", "suite": "summing_for", "type": "line", "title": "Summing For - 1M Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [62131.92283685184, 62131.92283685184], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [62124.53013849104, 32542.895864285958, 23049.065345463572, 18169.309326149654, 18001.76831836735, 17093.629762444518, 19147.251492042946], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [73253.95878199255, 44695.539841018835, 37628.8799210418, 32160.988931691067, 25780.759752318532, 24416.323301077882, 21390.852327258417], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_100000000", "suite": "summing_for", "type": "line", "title": "Summing For - 100M Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [6271191.964125527, 6271191.964125527], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6243269.7231997745, 3192353.977250274, 2180611.7600537953, 1783559.7206169788, 1660651.514533925, 1646918.6396995708, 1661169.339654936], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6311540.910344874, 3253811.628146227, 2251415.4695676635, 1787936.53366924, 1652575.4951761058, 1658270.8342896854, 1616450.6518942937], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "timed_task", "suite": "timed_task", "type": "grouped_bar_h_error", "title": "Timed Task - Scheduling Jitter", "categories": ["2ms (Normal)", "2ms (Steady)", "4ms (Normal)", "4ms (Steady)", "6ms (Normal)", "6ms (Steady)", "Mixed (Normal)", "Mixed (Steady)"], "groups": [{"name": "dispenso", "values": [9.954080702737015, 9.970294406141319, 9.957789792679334, 9.87145451636459, 9.97726479545248, 12.824925219724841, 9.979554878858284, 9.894181556958925], "errors": [0.46880110198638036, 0.0479700518865891, 0.35240762570797746, 0.4525810659416707, 0.19193815931339261, 26.843810624597428, 0.04522015442280679, 0.5092770671500604], "color": "#2ecc71"}, {"name": "folly", "values": [496.583849638699, 556.840214172594, 879.7843848215387, 724.7286991763077, 1326.24398026615, 1082.90358630563, 165.04625329437124, 209.31231811562878], "errors": [141.5301833754346, 991.2055774021107, 365.41615103797983, 242.89857675277736, 533.976408747388, 345.7069676219512, 209.45683632331378, 29.049332138882715], "color": "#9b59b6"}], "xaxis_label": "Mean Scheduling Error (us)"}, {"id": "trivial_compute_100", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [580.5775178253743, 574.6406038290687, 576.7989511892471, 575.9769562459976, 577.9981914972198, 580.669442029208, 579.405533944817], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [6661.6069209022635, 6561.512048899096, 6409.191173429974, 6319.209270760749, 6044.838730943729, 5689.791837089, 5460.399416127546], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_1000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 1M Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [21648549.500241645, 21648549.500241645], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [21669511.719665024, 11017505.859854281, 7488596.181266326, 5612743.663601578, 3738881.6845658068, 2882029.6882384038, 2278961.006548996], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [21669519.53040552, 10992401.046678424, 7441054.521861704, 5626434.959069924, 3741124.774434514, 2839112.8048164453, 2344558.6611112384], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_100000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100M Elements", "traces": [{"name": "serial", "x": [1, 12], "y": [561459167.045541, 561459167.045541], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12], "y": [562269250.0473931, 292155500.035733, 197789510.4791969, 148385433.3870113, 99080214.29354058, 74509717.54992174, 59913069.41723451], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12], "y": [561830165.9123971, 285128250.0196248, 192430114.7519145, 144186466.79259837, 96028398.85545628, 72845012.50833273, 58966774.25092397], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}]}, {"id": "windows-xeon-64c", "label": "Intel(R) Xeon(R) Platinum 8175M CPU @ 2.50GHz (64 cores)", "machine": {"timestamp": "2026-03-20T08:41:43.239873", "platform": "Windows", "platform_release": "11", "platform_version": "10.0.26100", "architecture": "AMD64", "processor": "Intel64 Family 6 Model 85 Stepping 4, GenuineIntel", "python_version": "3.12.4+meta", "build_system": "buck", "cpu_model": "Intel(R) Xeon(R) Platinum 8175M CPU @ 2.50GHz", "cpu_cores": 64, "platform_id": "windows-xeon-64c", "buck_mode": "@//arvr/mode/win/vs2022/opt"}, "charts": [{"id": "cascading_parallel_for_default", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For", "traces": [{"name": "serial", "x": [1000, 100000, 10000000], "y": [18126.627687758595, 1871601.288658641, 220908233.33322382], "color": "#95a5a6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_1000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 1K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [14741.759505814864, 14866.492413625363, 14865.525890659896, 14921.40076993976, 14880.833174482921, 14875.458359071257, 14904.42826552466, 14960.11693150802, 15067.048285931416, 15164.856751282998, 15460.382925693075, 15871.433659830913], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [14243.51815976715, 14454.417251627463, 14459.798994977096, 14461.002768250732, 14487.468247245839, 14981.62688442219, 14597.607545926017, 14578.580021558022, 14678.90569058702, 14820.44156991453, 15077.182680519616, 15202.662605176862], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [877918.8191872248, 414515.7829837441, 386387.0933331782, 355204.88165662787, 314773.19910519477, 273383.04259647196, 251369.3841224314, 218689.1682783568, 207745.9195742041, 209731.00321736166, 229503.06258320724, 246281.44404327663], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [791001.9911504951, 327248.2580038129, 265875.1975417691, 230665.2159576594, 179306.16999503932, 154205.31222896997, 129092.03311861103, 112570.32480305883, 98240.52514576392, 90351.59126779658, 91064.52671654278, 82600.54857143718], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_100000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 100K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1526928.6036036154, 830264.0661934267, 552650.9630814242, 418455.98086170544, 305991.36195059965, 234318.33848856762, 231394.4796063695, 194020.85293197815, 225480.14256624988, 275664.885158758, 299774.4963143794, 371656.00933474954], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1551691.7840362117, 875754.9277272715, 581173.9238408143, 407334.73479957413, 306607.85590283823, 253322.8764805983, 187819.57671968502, 179148.2964603572, 171820.64357765776, 190077.3506493753, 218409.92927868612, 228597.69425139533], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [4912614.788729987, 1933018.4357528584, 1554351.3742066827, 1211951.7793598466, 947158.3446412793, 816632.6508622769, 622457.1186436861, 526803.6189064309, 447190.9441234014, 419638.83323490515, 421511.16475537355, 433133.18777279864], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [5489421.37404694, 1714686.9135799925, 1231472.3776223848, 953330.4347833862, 740634.8499515749, 574660.2098466336, 411137.214484413, 315095.1845182348, 259162.0267261534, 234158.54800928867, 216791.39382621433, 201929.6593186341], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_10000000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 10M Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [199952066.66650724, 102800599.99994202, 68918229.99996294, 51819028.57137694, 40618217.64705254, 29560056.521760367, 23345778.571410846, 22031491.428554416, 20624918.421067137, 19562622.222211856, 17898750.000002015, 19239791.89187604], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [196639033.33336446, 132378919.99997373, 74751255.55558608, 52591566.666706055, 39176384.21051762, 30249945.4545465, 24379596.428583942, 20655768.750003744, 17909888.571427602, 18614663.157892976, 18371323.076910183, 18697297.29728656], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [406839450.00000674, 128752666.66666599, 93706199.99992868, 63308690.909087256, 43474174.999971636, 33269757.142859038, 23640117.24135896, 20076718.181812655, 17050264.99998894, 16541778.571427995, 18093715.384636126, 18486284.210523177], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [387399300.0001974, 128894866.66654193, 83954150.00006779, 66419772.72730318, 42381318.75001727, 32760295.238100905, 23412256.66665802, 20146739.99999234, 17918942.10526535, 17288219.04762732, 17730107.50000594, 17831479.487190686], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "concurrent_vector_serial", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Serial/Access Operations", "categories": ["push back serial", "push back serial reserve", "iterate", "iterate reverse", "lower bound", "index", "random"], "groups": [{"name": "std::vector", "values": [7517800.892864151, 2753903.787878919, 156543.87429409043, 677335.9821425272, 56842154.545434326, 159802.67616011872, 2473420.714285827], "color": "#808080"}, {"name": "std::deque", "values": [29579995.999993116, 0, 1169003.5714293248, 1645808.705356038, 179241824.99996278, 1016580.3212857594, 6518578.571428536], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [14866904.000009527, 15424524.000009114, 1340234.136545675, 2751136.1445778976, 102715500.00000691, 2669900.757575416, 3134037.7118656267], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [13250582.142859457, 13223976.78572023, 673235.602677989, 6679877.678574746, 227708700.00022113, 3376679.3427198986, 3642050.2564093717], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "concurrent_vector_parallel", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Parallel Operations", "categories": ["parallel", "parallel reserve", "parallel clear", "parallel grow by 10", "parallel grow by 100", "parallel grow by max"], "groups": [{"name": "std::vector", "values": [328254399.9998779, 324387349.9995971, 329473050.0001606, 56859089.99994353, 21150102.439028937, 3556605.1339291753], "color": "#808080"}, {"name": "std::deque", "values": [736798500.0000771, 0, 707295700.0000315, 282806100.00001615, 73043863.41463503, 29776539.99999651], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [58401463.6363215, 58540649.9999408, 60796890.90909762, 12945349.999995414, 4198957.558142062, 1595859.8214293066], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [44388806.66669623, 44234699.99999404, 41515088.235267766, 5652936.607142627, 643807.8000001042, 61743.41964286408], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "for_each_1000", "suite": "for_each", "type": "line", "title": "For Each - 1K Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1084.4699918143858, 1519.7095313950595, 1572.0795541952234, 3625.7103880354125, 5356.712675701717, 5282.134275190921, 7214.105845864306, 9071.630338554369, 12871.75152419334, 18384.761152789448, 23181.046055678067, 28533.958616168267], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [8009.351882652337, 39165.639671349956, 33065.22613718145, 58973.15430992767, 56404.99216112503, 52330.86058374619, 53960.34922351268, 61547.38193015489, 62366.87425594681, 67295.12595833918, 62915.879215068206, 63817.547683906836], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [10174.795394057788, 23197.568309595623, 23822.924668656386, 28461.893775754656, 28092.8603182864, 23696.57033848705, 24074.114119192094, 31412.114670590858, 33222.74918770779, 39872.901301540194, 43408.90551919424, 49250.57859209888], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3307.6101141892304, 6816.580744751944, 6890.116630755298, 8672.007690145374, 6706.4651624836015, 10548.869852954203, 11562.660136162458, 12325.000000005935, 15582.740043637485, 19274.075809962564, 26063.528397031692, 29953.151175792107], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_1000000", "suite": "for_each", "type": "line", "title": "For Each - 1M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1034970.1183433633, 595689.3707482524, 405083.57857527357, 1127796.8000003968, 216627.5757576152, 171827.0224171569, 402656.97606550186, 305066.35200955323, 213281.2044373861, 216792.99242427622, 168218.04952229635, 160331.34177218293], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [8026022.988501765, 20114088.57142669, 55972607.69228832, 62716099.99996806, 49736192.8571211, 58105458.333329804, 53120723.07690239, 53590591.66668582, 58538541.66662132, 56744900.00001242, 47827500.00000911, 43286262.50003254], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [17695149.99998339, 31141654.545453176, 30434668.181814313, 29903634.782583497, 44486087.49999039, 34372458.33333691, 32176096.000002872, 41001626.08697049, 36350609.09089754, 33771790.476185694, 32093731.578961045, 33987964.999960236], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3201.5060654608983, 6997.247003823731, 7690.123813500883, 8664.049509344155, 6846.994038311776, 10638.479908910924, 11471.217398130926, 12281.87570834865, 16018.497056011467, 20795.44116044832, 23569.208303997504, 30612.809046745686], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_100000000", "suite": "for_each", "type": "line", "title": "For Each - 100M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [109234616.66667815, 66536019.999966815, 44435031.250031896, 76661340.00000966, 21261312.12119645, 35425519.99998977, 27262089.47368784, 28305817.391296647, 20998077.41935013, 18547441.025638808, 17987158.139539834, 16404246.341463897], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [851916500.0006979, 4051674400.0005927, 4515227100.000629, 4694814299.999962, 4861839099.999997, 5035947000.000306, 5235028399.999464, 5249350699.999923, 5716208199.999528, 6091953799.999828, 4860996599.999453, 4421639799.9998665], "color": "#e67e22"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_latency_default", "suite": "for_latency", "type": "line", "title": "For Latency", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [39721527.77775288, 35242369.99996901, 33481509.523794338, 32807752.380947582, 31958850.000013296, 31791295.454530135, 31275231.818199243, 31198531.818187226, 31174109.09089813, 31008299.999998797, 31082345.454540864, 31251334.782609772], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [47286566.66666211, 47342366.66669555, 47161833.33330264, 47164519.99999359, 45003046.66665518, 38977068.42101462, 33855600.00000017, 31342742.85713175, 32234536.363665197, 31261881.818202462, 31327159.09094424, 31395763.63636562], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "future", "suite": "future", "type": "grouped_bar_v", "title": "Future/Async Tree Build Benchmark", "categories": ["Small", "Medium", "Large"], "groups": [{"name": "serial", "values": [2028086.1764711197, 16244425.581410112, 130485699.99991742], "color": "#95a5a6"}, {"name": "std::async", "values": [26942503.70371231, 227501300.00016117, 1784169900.0000517], "color": "#e74c3c", "error_indices": [0, 1, 2]}, {"name": "folly::Future", "values": [26942503.70371231, 227501300.00016117, 1784169900.0000517], "color": "#e67e22"}, {"name": "dispenso::Future", "values": [1055777.927928472, 6899076.237621754, 51183699.99998625], "color": "#3498db"}, {"name": "dispenso::TaskSet", "values": [592249.4097811138, 4015842.8571417714, 31573409.090924546], "color": "#2ecc71"}, {"name": "dispenso::TaskSet (bulk)", "values": [557404.2959427392, 4001654.0697674933, 32546909.52382502], "color": "#1e8449"}, {"name": "dispenso::when_all", "values": [1051290.000000068, 6891887.254904063, 51322991.66666598], "color": "#9b59b6"}], "yaxis_unit": "ns", "xaxis": "Tree Size"}, {"id": "graph_bar", "suite": "graph", "type": "bar_h_colored", "title": "Graph Benchmark", "items": [{"name": "taskflow build big tree", "time_ns": 25833532.142866846, "color": "#f39c12"}, {"name": "build big tree (Graph)", "time_ns": 5941274.999997859, "color": "#2ecc71"}, {"name": "build big tree (BiProp)", "time_ns": 7067700.892849059, "color": "#27ae60"}, {"name": "build bi prop dependency chain", "time_ns": 136213.96143027165, "color": "#2ecc71"}, {"name": "build bi prop dependency group", "time_ns": 2581.249501953236, "color": "#2ecc71"}, {"name": "build dependency chain (Graph)", "time_ns": 28032.311462890517, "color": "#2ecc71"}, {"name": "build dependency chain (BiProp)", "time_ns": 28677.725099469477, "color": "#27ae60"}, {"name": "execute dependency chain (Graph)", "time_ns": 12356.560714286908, "color": "#2ecc71"}, {"name": "execute dependency chain (BiProp)", "time_ns": 12459.033928572742, "color": "#27ae60"}, {"name": "forward propagator node (Graph)", "time_ns": 4592369.798608569, "color": "#2ecc71"}, {"name": "forward propagator node (BiProp)", "time_ns": 4386797.315437556, "color": "#27ae60"}], "xaxis_unit": "ns"}, {"id": "graph_scene_bar", "suite": "graph_scene", "type": "bar_h_colored", "title": "Graph Scene Benchmark", "items": [{"name": "scene graph parallel for", "time_ns": 28857943.478294108, "color": "#2ecc71"}, {"name": "scene graph taskflow", "time_ns": 18099464.864856087, "color": "#f39c12"}, {"name": "scene graph concurrent task set", "time_ns": 11973226.415064791, "color": "#2ecc71"}, {"name": "scene graph partial revaluation", "time_ns": 601544.1176478347, "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "idle_pool_default", "suite": "idle_pool", "type": "line", "title": "Idle Pool", "traces": [{"name": "dispenso_very_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [100577.44285716451, 100477.97142864771, 100540.0285714911, 100614.27142863977, 100561.41428568869, 100445.54285715484, 100716.21428583706, 100619.41428570468, 100809.07142855723, 100698.04285714262, 100645.08571420966, 100877.25714285106], "color": "#76d7c4"}, {"name": "tbb_very_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [109419.05000011805, 109621.98333330282, 110211.93333332728, 110327.7166666885, 110105.26666662675, 110184.99999993634, 110196.56666667288, 109524.91666663869, 110043.38333335302, 109811.11666660581, 110293.11666667734, 110031.40000002533], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [8.091717308393113, 7.865715993701925, 7.062882083147959, 23.93396188851053, 739.6942999994279, 612.7190036897774, 1178.2056034480224, 1309.8851926987982, 1296.4471193411969, 1196.9085365851752, 1688.3561797755071, 1514.9509761400375], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [108.8943249964334, 69.1143796305345, 82.3244678334926, 85.62411599828017, 67.20751362197133, 69.92567730279316, 104.24887310905096, 106.03587738499377, 156.75190960457957, 187.40803738333446, 363.61430715384876, 503.2907000004343], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_10000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 10K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [36.18218463303709, 36.99201541547904, 39.99319278492162, 40.5917188667003, 739.9537000001146, 7024.2270000017015, 12050.136956521903, 13561.712765952874, 13500.390384618953, 14603.163461533068, 14461.65434781901, 15286.295652181847], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1000.6933333323826, 620.2325531908269, 636.8153915394709, 673.0084057965098, 668.5194202899753, 714.8282807727232, 969.1446969697214, 945.3789473677593, 1489.867413442556, 2000.9383522728347, 3367.8325471684284, 4465.587591242509], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1M Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3199.7547945205097, 3199.5041284379677, 3391.3839449536295, 3227.848623854188, 4759.788292686586, 62171.90172413932, 1307685.699999638, 1238735.9000003925, 1438651.7000002642, 1432174.6999994502, 1476455.6000000085, 1503446.4999998817], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [97064.52857140642, 60487.027272674, 81091.33333336003, 67081.8818181777, 66798.31111109201, 81787.74444442954, 88620.65000005258, 126471.58333326539, 140749.56000004022, 179141.89999987686, 342533.0000000031, 473776.59999983734], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_100000", "suite": "locality", "type": "line", "title": "Locality - 100K Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [1372865.0099412487, 396449.9296767358, 287872.9962547995, 691375.1780263986, 1333293.4823094946], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [1398130.0403227785, 640431.7915690942, 257724.575617164, 394259.36990761163, 655733.5664337942], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [4713486.754969339, 1118057.2799989022, 509874.96136003983, 513410.16949110146, 509839.9423218571], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_4000000", "suite": "locality", "type": "line", "title": "Locality - 4M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [68272969.99999817, 17701070.73171835, 7704507.21649899, 6780490.00000101, 7062852.222225046], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [66101881.81821286, 17390972.500015777, 7736593.975907058, 7678023.655918428, 6323190.816323292], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [103735385.71418197, 20089636.842098307, 6958756.122448718, 6393008.411221196, 6524390.476191738], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_32000000", "suite": "locality", "type": "line", "title": "Locality - 32M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [559581600.0004561, 174945149.999985, 105072962.50000308, 123572514.28576481, 103039814.28569387], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [557717400.0000013, 217494299.99999848, 97710157.14289077, 104560383.33328857, 98818785.71429427], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [804808199.9995703, 145316299.99983126, 94043187.50000584, 102488714.28575513, 102389357.14291334], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_10", "suite": "nested_for", "type": "line", "title": "Nested For - 10 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [33858.357132509685, 21570.053098417982, 17931.951649596445, 15031.24703342606, 12856.704783621133, 13217.631364842755, 13752.40154925209, 14158.554928287698, 16291.79828467407, 19550.381616508203, 23404.35310177283, 23205.286764936332], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [145914.33954359655, 76030.04197040669, 68772.11178339488, 62312.82317343137, 56159.78587382755, 51719.34419020101, 48535.97243800427, 44645.04458926293, 44210.11349302964, 43529.391454463344, 48519.50580573356, 36625.71311519222], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_500", "suite": "nested_for", "type": "line", "title": "Nested For - 500 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [98900542.85707654, 49213564.28576474, 32481114.285734035, 24127933.333349776, 15949166.66666298, 11932260.71429113, 7684871.604935864, 6136046.728974281, 5355518.571429586, 5103538.000003028, 3856418.0412341207, 3519849.740934066], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [228494533.33340836, 74110944.44453031, 50489110.000035, 35876015.789462574, 23832541.37930635, 18014897.36843857, 11929637.50000737, 8773572.972979702, 6543461.111117789, 4916952.941176005, 4261844.512196083, 4260097.093022121], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_2500", "suite": "nested_for", "type": "line", "title": "Nested For - 2K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [2559967000.0001426, 1282695899.999453, 863696499.9996053, 661091900.0004287, 435640500.0003179, 326697049.99989337, 217506166.66668066, 164267100.0001883, 121834599.99996558, 95636900.00004499, 70992060.00002596, 60689918.18185829], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [4153688699.999293, 1577121799.9999862, 999466399.9996191, 751229499.9996811, 500239400.00028235, 375621350.000074, 252477166.66672206, 190841374.99999088, 126194860.00003235, 114451433.3332154, 78378177.77787374, 65656236.363627784], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_1000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [863.2724223609545, 1199.5239208628598, 1150.5110929858026, 875.495139733226, 905.1462740381147, 943.8653562656708, 1176.5151026389249, 1117.1101744176392, 1574.7970251713277, 1899.4592105268807, 2047.8376996811166, 2754.6197580638745], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [239.12471871795694, 993.2983630941786, 734.3979357791786, 347.99933035734347, 431.1715507410948, 233.20158569493464, 487.40065537951824, 296.3487878787418, 493.6928066792973, 509.50043604641087, 396.0932511737869, 426.3131474107398], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3892.6750000016564, 2550.5402877701167, 2543.303610107594, 2005.9129032249616, 2024.781081080299, 1893.734584448601, 1753.516120907744, 1575.6957013578703, 1474.6793176973738, 1450.199188640012, 1304.199626864924, 1382.9853515634481], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_10000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [24995.22500001409, 20401.8171428418, 21819.796874979147, 16063.281081064692, 18918.259183678576, 11774.160655730782, 11747.431645571965, 11700.133333330374, 21380.447826073658, 25750.955555546778, 26381.953846164553, 28146.021428580883], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [6700.27788461224, 7865.334090907213, 6414.663888891583, 8359.228409093786, 4203.702840909914, 3326.603825132865, 4188.728934012236, 2857.99221901916, 3676.2754189925413, 3729.014285714324, 3748.9309859179034, 3348.57685185044], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [105959.16666655587, 54654.66923082064, 52332.97692302586, 49357.07142859818, 41991.029411773896, 35743.05500001174, 30247.943478254612, 24344.693103432353, 20412.067647050528, 19978.605882362684, 17486.680000001797, 17482.43750000711], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_300000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 300K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [4031846.300000325, 728813.4000000355, 889849.3000006056, 994105.6999996363, 1049320.0000000796, 1522894.499999893, 997350.8000002766, 1162328.2000000472, 719202.5000003923, 590710.8999999764, 538416.5999994366, 389333.69999995193], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1021553.2000001986, 550953.599999957, 808491.700000559, 575432.6000005676, 428497.8999999112, 301521.5999998873, 271191.1500000497, 184467.9000000724, 160944.57500003045, 123093.80000003027, 101482.81428571083, 94858.25000001569], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [18044294.700000137, 7065072.499999587, 5501108.999999815, 4483277.099999214, 3272344.9000004623, 2602393.4999993797, 1837074.6999999029, 1465866.3999998681, 1134443.299999475, 1034357.9000000318, 910747.8999994782, 940844.6999996158], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "once_function_move", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Move Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Once Function", "values": [13.297325000004482, 12.44779218750125, 15.474499999998313, 63.17638392853756], "color": "#2ecc71"}, {"name": "Std Function", "values": [84.60370535721447, 87.93886214566874, 85.50149553577009, 99.84706250008912], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "once_function_queue", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Queue Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Inline Function", "values": [1728.3028828328033, 2298.9828136373135, 4106.250000001564, 11814.530357144447], "color": "#3498db"}, {"name": "Once Function", "values": [4151.649530510759, 4444.022319605726, 5714.958035712568, 24791.373699199172], "color": "#2ecc71"}, {"name": "Std Function", "values": [15993.8772321563, 15449.176339278178, 16863.257789670104, 27332.48588702272], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "pipeline_bar", "suite": "pipeline", "type": "bar_h_colored", "title": "Pipeline Benchmark", "items": [{"name": "serial", "time_ns": 517309400.00015837, "color": "#95a5a6"}, {"name": "dispenso", "time_ns": 323648450.00029165, "color": "#2ecc71"}, {"name": "tbb", "time_ns": 345052449.9999119, "color": "#3498db"}, {"name": "taskflow", "time_ns": 279032433.33336526, "color": "#f39c12"}, {"name": "dispenso par", "time_ns": 17289443.24325287, "color": "#2ecc71"}, {"name": "tbb par", "time_ns": 15333088.888898298, "color": "#3498db"}, {"name": "taskflow par", "time_ns": 177765800.00008833, "color": "#f39c12"}], "xaxis_unit": "ns"}, {"id": "pool_allocator_1t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - Single-threaded", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [4841874.482754692, 20133108.82351588, 15900661.76469704, 86727966.66678551, 52209740.000034794, 338507800.0001158], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [151697.4553571312, 608897.767857083, 151710.62499998047, 610194.3750000568, 151843.12499993018, 613170.6473218352], "color": "#2ecc71"}, {"name": "Arena", "values": [87785.24173030464, 354860.6581531803, 87807.96839426439, 369143.4184678627, 102888.89285707878, 396455.46921698225], "color": "#27ae60"}, {"name": "NoLock", "values": [36529.972276425644, 149938.8169644281, 34454.390100169076, 137750.74327024948, 36677.227192370396, 148020.87053575568], "color": "#3498db"}, {"name": "NoLock Arena", "values": [38532.90736606547, 153872.82040979038, 42947.78008440383, 163558.2704584923, 38635.22138789477, 182755.6250000433], "color": "#2980b9"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_2t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 2 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [1282449.9999997637, 5457345.251396258, 4142546.250000123, 134987949.99986786, 361473675.0000702, 1761543900.000106], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [740431.6964287513, 2283438.6363614835, 617903.2142857263, 2138911.7056853967, 632641.183035584, 2622329.166666587], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_8t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 8 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [4267534.7417848725, 43508459.459460296, 157427864.7058771, 1071541200.0004108, 2990842500.000326, 29085644200.00039], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [28401976.00002284, 119940283.33341096, 28545066.66665202, 125883900.00000799, 30857290.476195063, 148626225.00004363], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_16t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 16 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [29071062.000002712, 315045845.45448536, 1089591550.000023, 6414324400.000624, 20524383299.9995, 126245421600.00055], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [147053179.99996623, 575132300.000405, 139295816.66676918, 614732399.9994115, 143203459.99985877, 868872099.9998623], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "rw_lock_serial", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Serial Operations", "categories": ["2 iterations", "8 iterations", "32 iterations", "128 iterations", "512 iterations"], "groups": [{"name": "NopMutex", "values": [1117013.8709677567, 751688.8531617045, 696887.0259483511, 743824.0641708043, 691569.5181911967], "color": "#95a5a6"}, {"name": "std::shared_mutex", "values": [19305188.888867836, 19369341.666662067, 19416355.55555068, 19409374.999996647, 19450205.55556059], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [16413025.58138789, 15095457.446806634, 14557727.083342798, 14445100.000000598, 14355700.000012396], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_2", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (2 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [19930965.714281358, 60408490.000008896, 337606033.3333347, 1040186500.0003453, 2435386000.000108, 5883264100.000815], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [16511460.465111203, 123026942.85712537, 651270000.0002952, 2099785399.999746, 12183545500.000036, 47369681199.99983], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_8", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (8 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [21549513.888885483, 75667622.22217625, 314955900.0003137, 1079131499.9996758, 2836406999.999781, 6595037799.999773], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [15019802.127658585, 89142955.55556843, 365472749.9997534, 1228708399.9999595, 4374994499.999957, 20147908899.999493], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_32", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (32 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [21245997.058833703, 72240179.9999716, 317916500.0001376, 996513100.0008113, 2575993400.000698, 6022973900.000579], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [15939482.222226312, 73564466.66674149, 281179333.33336014, 593563399.9997662, 2002235500.000097, 7846040399.999765], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_128", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (128 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [20044875.7575831, 74689120.00003912, 339969900.0001419, 889150599.9999936, 2100237499.999821, 5162757999.999485], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [14741991.836727927, 65577990.000019774, 168589574.99994177, 417482050.00021696, 1275850800.0006258, 3649717499.999497], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_512", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (512 iterations)", "categories": ["1 threads", "2 threads", "4 threads", "8 threads", "16 threads", "32 threads"], "groups": [{"name": "std::shared_mutex", "values": [21410923.52940177, 77721177.77777667, 346075800.0001078, 910926999.9999014, 1920998399.9999168, 4707656599.999609], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [14390197.959166508, 59954890.90913577, 155646274.99998096, 373191650.0000807, 859905499.9995133, 2162233600.000036], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "simple_for_1000", "suite": "simple_for", "type": "line", "title": "Simple For - 1K Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [123.07973214287163, 123.07973214287163], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3730.466943069201, 3837.7929644697133, 3838.174185171489, 3846.697219407341, 3776.0300229212608, 3268.643969359266, 3907.4644168556215, 3930.4048728813827, 3989.4425319353586, 3010.6535108992152, 1198.065724584539, 1980.133482777899], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [530070.0000007055, 655714.800000169, 553511.0000000714, 683684.3999999473, 529424.0000002901, 566112.9000000074, 1008582.3999997956, 1520637.7000004067, 1579156.5999998057, 2060363.6999994705, 2633048.8999992665, 3749334.399999498], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [94489.09400372862, 55404.19955797613, 52165.437855183314, 49971.01224834739, 45114.74747472575, 44121.334580600385, 42392.62973476715, 40821.44385952365, 38385.39192397927, 38079.58828807129, 43669.55378971126, 31402.742625543535], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_1000000", "suite": "simple_for", "type": "line", "title": "Simple For - 1M Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [319111.9549932008, 319111.9549932008], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3740731.016040369, 1883934.2318081206, 1263728.158845147, 958718.169399642, 641319.8662841313, 518581.76382622484, 356296.33818012616, 337820.90183176304, 200108.1678115046, 299582.39688189875, 115051.05351870028, 88691.24304753881], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [41611268.00000602, 30974307.999995288, 21487643.99999891, 16418552.000004638, 11053179.000000454, 8297672.999997304, 6320484.000007127, 6408918.571423783, 6326315.957450905, 6700124.034334154, 6581888.495575748, 6319112.107622977], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [2301049.999998864, 732378.3246071336, 552477.5715394203, 418109.99397952174, 314212.67787841236, 251644.77611938928, 184413.24946481115, 153759.2164419024, 122550.22002193381, 118082.17368332973, 107563.22610855168, 94512.12864547076], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_100000000", "suite": "simple_for", "type": "line", "title": "Simple For - 100M Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [67764627.2727295, 67764627.2727295], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [374883649.99990153, 188763274.99992839, 125939799.99989338, 94210857.14296038, 62940163.63633339, 47400453.33334517, 31880404.54547676, 24358575.86207366, 17466794.73683942, 16123037.209306562, 14825679.591829576, 14073414.2857155], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [411014485.7143366, 385221509.09085953, 228389069.9999574, 170449150.0000586, 116734610.00000314, 87824870.00007677, 59986899.999967135, 52921700.00002443, 70141750.00003889, 60708569.99995158, 62731880.000046656, 60175500.0000594], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [194416074.9999355, 61618063.63632588, 41292594.11766885, 33157647.82608154, 20907515.151520785, 16060631.818184199, 14373813.207539398, 12140977.586207964, 11818566.101699278, 12474183.05084173, 15330244.444440823, 15240334.782625405], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_pool_1000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [24.277716282106763, 56.13552705407945, 195.84133619485556, 678.2534999993003, 404.57178354494965, 646.0160134384197, 915.6794753091253, 1070.1370646760308, 1043.2820189267852, 1117.476061121439, 1635.1648451739231, 1298.4027522947179], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3.6356116902166824, 102.4837588434942, 410.48458998371774, 228.35810276696577, 198.67492841240426, 212.9978291119281, 383.7573289902642, 239.66880503136946, 371.0842427619582, 436.0011147540807, 345.8839126687858, 316.0932635414927], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [110.34019622635448, 61.096116418232064, 65.82905245180723, 66.21044876082574, 73.65813631521854, 74.59586090976644, 89.01725894788872, 82.66290284639199, 199.71918079115676, 378.6000987169127, 687.1267167380328, 873.793603936454], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_10000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [241.26279310339356, 95.73643086366705, 1548.2915807556512, 7496.027027029724, 3877.95562130241, 9096.213095236159, 9134.03835617317, 10599.76578948121, 11601.289361699835, 10774.16290322177, 11690.830508487741, 12350.17499999458], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [32.0947395020109, 151.15673975203256, 4592.246391753506, 4338.003521131123, 3333.4457055227253, 1873.2697986560129, 3412.0469314068737, 3433.8157894739784, 4021.7894444443423, 3760.596315786595, 2546.2014218000245, 2850.483141764403], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [1394.85276752724, 830.3519695039215, 788.4571748878452, 804.4626575024355, 864.0049118391016, 806.9064889923275, 1084.3216088332003, 1462.9511247454159, 2309.0762411353767, 3841.8216494832936, 8936.300000000183, 10631.54754099175], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_1000000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1M Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [24174.65517242065, 53904.19069767949, 680157.2000003944, 676183.100000344, 410173.0500001395, 874569.3999999275, 917388.3000003116, 1147152.100000312, 1095964.3999995023, 1450257.8000001449, 1323019.2000000898, 1619255.3999999291], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [3181.654090907283, 19991.609090924834, 132134.42000014766, 380272.5499999724, 356584.6000001329, 326526.02500002104, 390302.8999998241, 366209.95000021136, 387441.5999998746, 352939.9499998363, 305708.70000004413, 173304.50000008568], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [754986.5999999384, 187431.95000001835, 189270.47500005754, 198268.33333354443, 269627.0000001277, 194687.624999915, 200951.36666683783, 211672.6666666485, 235006.60000020918, 544083.4000000905, 1025400.8000001705, 1310981.9999999672], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "small_buffer", "suite": "small_buffer", "type": "bar_h_colored", "title": "Small Buffer Allocator Benchmark", "items": [{"name": "newdelete[S]/8192", "time_ns": 649918.5267858008, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192", "time_ns": 122091.17857147638, "color": "#2ecc71"}, {"name": "newdelete[S]/8192/threads:16", "time_ns": 60861.78125007298, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192/threads:16", "time_ns": 14027.127511199009, "color": "#2ecc71"}, {"name": "newdelete[S]/32768", "time_ns": 2327006.8273106236, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768", "time_ns": 489625.53633217537, "color": "#2ecc71"}, {"name": "newdelete[S]/32768/threads:16", "time_ns": 225898.00781389614, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768/threads:16", "time_ns": 51777.78888078719, "color": "#2ecc71"}, {"name": "newdelete[M]/8192", "time_ns": 513826.20000003953, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192", "time_ns": 109757.37500004357, "color": "#2ecc71"}, {"name": "newdelete[M]/8192/threads:16", "time_ns": 67333.4455817907, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192/threads:16", "time_ns": 15188.683461341448, "color": "#2ecc71"}, {"name": "newdelete[M]/32768", "time_ns": 4531621.249998352, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768", "time_ns": 438479.1875003202, "color": "#2ecc71"}, {"name": "newdelete[M]/32768/threads:16", "time_ns": 285637.53906283295, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768/threads:16", "time_ns": 60007.86272320877, "color": "#2ecc71"}, {"name": "newdelete[L]/8192", "time_ns": 915519.308036445, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192", "time_ns": 139625.03013253893, "color": "#2ecc71"}, {"name": "newdelete[L]/8192/threads:16", "time_ns": 75147.95312506096, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192/threads:16", "time_ns": 14773.132812493373, "color": "#2ecc71"}, {"name": "newdelete[L]/32768", "time_ns": 3777730.379743663, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768", "time_ns": 558594.1071428481, "color": "#2ecc71"}, {"name": "newdelete[L]/32768/threads:16", "time_ns": 394780.77256897377, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768/threads:16", "time_ns": 64837.47395845304, "color": "#2ecc71"}], "xaxis_unit": "ns", "legend": [{"name": "SmallBufferAllocator", "color": "#2ecc71"}, {"name": "new/delete", "color": "#e74c3c"}]}, {"id": "small_buffer_shared_bar", "suite": "small_buffer_shared", "type": "bar_h_colored", "title": "Small Buffer Shared Benchmark", "items": [{"name": "newdelete<kLargeSize>/32768", "time_ns": 3153438.207545888, "color": "#7f8c8d"}, {"name": "newdelete<kLargeSize>/32768/threads:16", "time_ns": 380519.4754450879, "color": "#7f8c8d"}, {"name": "newdelete<kLargeSize>/8192", "time_ns": 1254287.3437496382, "color": "#7f8c8d"}, {"name": "newdelete<kLargeSize>/8192/threads:16", "time_ns": 74954.21875004146, "color": "#7f8c8d"}, {"name": "newdelete<kMediumSize>/32768", "time_ns": 4597018.831167612, "color": "#7f8c8d"}, {"name": "newdelete<kMediumSize>/32768/threads:16", "time_ns": 296599.54427193233, "color": "#7f8c8d"}, {"name": "newdelete<kMediumSize>/8192", "time_ns": 513510.35714251985, "color": "#7f8c8d"}, {"name": "newdelete<kMediumSize>/8192/threads:16", "time_ns": 61919.997480021404, "color": "#7f8c8d"}, {"name": "newdelete<kSmallSize>/32768", "time_ns": 2322325.000002159, "color": "#7f8c8d"}, {"name": "newdelete<kSmallSize>/32768/threads:16", "time_ns": 221986.53492717526, "color": "#7f8c8d"}, {"name": "newdelete<kSmallSize>/8192", "time_ns": 748380.3571426099, "color": "#7f8c8d"}, {"name": "newdelete<kSmallSize>/8192/threads:16", "time_ns": 60153.906249738044, "color": "#7f8c8d"}, {"name": "small buffer allocator<kLargeSize>/32768", "time_ns": 595084.1071426437, "color": "#7f8c8d"}, {"name": "small buffer allocator<kLargeSize>/32768/threads:16", "time_ns": 72366.75781250311, "color": "#7f8c8d"}, {"name": "small buffer allocator<kLargeSize>/8192", "time_ns": 149054.9776785802, "color": "#7f8c8d"}, {"name": "small buffer allocator<kLargeSize>/8192/threads:16", "time_ns": 15871.16015620893, "color": "#7f8c8d"}, {"name": "small buffer allocator<kMediumSize>/32768", "time_ns": 491120.7142859959, "color": "#7f8c8d"}, {"name": "small buffer allocator<kMediumSize>/32768/threads:16", "time_ns": 62205.6005859406, "color": "#7f8c8d"}, {"name": "small buffer allocator<kMediumSize>/8192", "time_ns": 122679.41071432429, "color": "#7f8c8d"}, {"name": "small buffer allocator<kMediumSize>/8192/threads:16", "time_ns": 14346.227820424781, "color": "#7f8c8d"}, {"name": "small buffer allocator<kSmallSize>/32768", "time_ns": 546668.4821434293, "color": "#7f8c8d"}, {"name": "small buffer allocator<kSmallSize>/32768/threads:16", "time_ns": 55517.51302087145, "color": "#7f8c8d"}, {"name": "small buffer allocator<kSmallSize>/8192", "time_ns": 136748.11169155658, "color": "#7f8c8d"}, {"name": "small buffer allocator<kSmallSize>/8192/threads:16", "time_ns": 14595.566406251846, "color": "#7f8c8d"}], "xaxis_unit": "ns"}, {"id": "summing_for_1000", "suite": "summing_for", "type": "line", "title": "Summing For - 1K Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [165.63020305552206, 165.63020305552206], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [283.97484404163015, 287.415294303854, 288.7592753538838, 291.2505429065487, 292.6986874961101, 297.2038260866358, 291.68175467479045, 297.40874013502855, 308.25913038902155, 324.9908315987843, 356.9433864665559, 419.15277892633566], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [95965.93034283393, 62567.788592751866, 66087.83472895667, 55719.6008768989, 53579.3209402647, 53853.236105647215, 47966.7218324109, 43952.141120723856, 42071.109001370045, 41295.14586041345, 44369.940513872105, 33112.1733469637], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_1000000", "suite": "summing_for", "type": "line", "title": "Summing For - 1M Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [191584.62362719976, 191584.62362719976], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [186574.36813195448, 391835.8830147151, 267170.3406047968, 203913.19027185196, 137883.74354900772, 107803.5523420331, 92882.73067330717, 117187.36313277017, 29895.387872354004, 43855.310748823875, 56892.74349440751, 45718.8461538357], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [608633.9331620311, 248218.53795055265, 197207.87280019786, 174024.358974295, 135652.9222735869, 124713.75679935279, 99408.56622125838, 92679.01200873358, 77913.14240441729, 70674.81800470152, 69779.40696671692, 57817.8154824973], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_100000000", "suite": "summing_for", "type": "line", "title": "Summing For - 100M Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [40467617.64707566, 40467617.64707566], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [41011788.23528993, 44761018.74996629, 29970426.086947557, 21354400.00000182, 16108543.181809926, 10993362.50000249, 7566056.470579283, 6707372.1739095915, 5014663.000001746, 4178838.690477061, 4101583.040938903, 4104495.321637294], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [50261871.42857452, 21100748.48485855, 14535400.000011355, 10938712.499992676, 7612709.999999165, 5862020.833334706, 5012698.999998975, 4375098.837213997, 4064921.019110611, 4039693.0232556006, 4083682.9411786958, 4098470.175438983], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "timed_task", "suite": "timed_task", "type": "grouped_bar_h_error", "title": "Timed Task - Scheduling Jitter", "categories": ["2ms (Normal)", "2ms (Steady)", "4ms (Normal)", "4ms (Steady)", "6ms (Normal)", "6ms (Steady)", "Mixed (Normal)", "Mixed (Steady)"], "groups": [{"name": "dispenso", "values": [114.10507199445439, 210.6759080536653, 94.99403194593472, 122.64779616308293, 111.22424800323611, 156.4002840850341, 58.25143066871532, 169.9293040307555], "errors": [259.59161117679963, 327.74180372595737, 221.48459148240076, 248.50180966736076, 258.4484966445175, 283.9156838132939, 184.24238583588738, 295.2365190105025], "color": "#2ecc71"}, {"name": "folly", "values": [568.0985280046283, 919.2386400182342, 545.5392239974364, 892.1381915636856, 533.2746680010663, 848.7736196124131, 723.1684653297855, 792.7332290995415], "errors": [564.2962547763238, 562.0669852732883, 555.8913424860759, 492.95077899891294, 605.9270038487692, 454.4098151601485, 599.8303041159767, 539.7679052957852], "color": "#9b59b6"}], "xaxis_label": "Mean Scheduling Error (us)"}, {"id": "trivial_compute_100", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [8194.632153224366, 8200.924125655556, 8208.993698898777, 8210.547223910802, 8211.580058653659, 8211.550915062888, 8225.209084263233, 8275.614111854047, 8283.037262208562, 8325.585679190948, 8419.918217231101, 8715.408653548191], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [39884.28930467646, 31430.38843723694, 31214.695230832833, 32296.577563510007, 31171.23190649365, 30344.09665979791, 29450.223880596433, 31696.94395757417, 32137.197921382132, 33922.67439030511, 39177.04973306321, 23799.781181623777], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_1000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 1M Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [90999777.77784969, 90999777.77784969], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [90152174.99995743, 45173566.66668396, 30222656.521761127, 22694403.448283616, 15322123.913036825, 11436237.93103839, 7971884.615381986, 5826082.7586231055, 4384141.807909891, 3530944.4444437483, 3137914.9122825, 3131572.687223701], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [114842633.33325846, 45232699.999966525, 30166578.260832466, 22630087.096771486, 15176271.739145033, 11396609.677420553, 7612427.17391289, 5816072.1311472375, 4536531.944444909, 3924480.8743161066, 2749477.777775783, 2581395.6363642565], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_100000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100M Elements", "traces": [{"name": "serial", "x": [1, 64], "y": [7970337799.99947, 7970337799.99947], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [7959612399.9999695, 4335119400.000622, 2660708999.9997697, 2002838999.999767, 1336128599.9999382, 1004162499.9994383, 673378100.0002637, 528924399.99959606, 354444800.0002376, 265958699.99982825, 284690399.9998176, 283763350.0002994], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64], "y": [11798446499.999954, 3921351299.9995146, 2611634500.0000367, 1959720800.0004685, 1307265999.9997995, 983014100.0002186, 658705300.0002925, 510544100.00051576, 524089900.0000354, 527799899.9999909, 279121349.9999685, 277920633.33314824], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}]}, {"id": "android-SXR2230P-6c", "label": "Unknown (6 cores)", "machine": {"timestamp": "2026-03-13T21:39:24+00:00", "platform": "Android", "platform_release": "12", "platform_version": "SDK 32", "architecture": "aarch64", "processor": "arm64-v8a", "device_model": "Quest 3", "device_name": "eureka", "soc_manufacturer": "Qualcomm", "soc_model": "SXR2230P", "hardware": "", "cpu_implementer": "0x41", "cpu_part": "0xd4b", "cpu_variant": "0x0", "cpu_cores": 6, "cpu_max_freq_mhz": 2054, "memory_kb": 7941372, "memory_gb": 7.5, "kernel": "5.10.205-01593-g21f33663a190", "platform_id": "android-SXR2230P-6c", "compiler": {"compiler_id": "Clang", "compiler_version": "19.x (Android NDK r26b)", "build_type": "Release (opt)", "compiler_summary": "Clang 19.x (Android NDK r26b) Release (opt)"}}, "charts": [{"id": "cascading_parallel_for_default", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For", "traces": [{"name": "serial", "x": [1000, 100000, 10000000], "y": [11202.182462469822, 1029693.8759103239, 102025825.99958986], "color": "#95a5a6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_1000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 1K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6], "y": [11479.564282153204, 11359.95342418285, 11354.091228354944, 11362.63826854818, 11354.038655426712], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6], "y": [11261.29988727075, 13471.981386236224, 14205.151065086318, 14558.53206657101, 13972.97655239293], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [147090.64674729988, 139420.02387293856, 157708.99437164958, 171646.142055542, 188087.77158036386], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6], "y": [149690.08794803094, 124360.79122322708, 126672.07372210975, 141339.5555555142, 144893.0879623469], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_100000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 100K Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6], "y": [1057726.063909901, 730870.0419976538, 548037.2636942863, 429752.00655642076, 407500.9724916474], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6], "y": [1037306.705878858, 685366.4387260021, 496504.9889284689, 401480.11159571056, 320889.6151340309], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [1759704.7106895354, 925631.1461823646, 716468.145453421, 605852.2707438713, 569085.8455289241], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6], "y": [1753545.0937543828, 920307.4506618386, 674174.120193199, 567108.6428579948, 496408.31052544713], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "cascading_parallel_for_10000000", "suite": "cascading_parallel_for", "type": "line", "title": "Cascading Parallel For - 10M Elements", "traces": [{"name": "dispenso_blocking", "x": [1, 2, 3, 4, 6], "y": [104531659.99994476, 71714668.74979159, 48331223.59996196, 41342794.28555082, 35476186.571551286], "color": "#2ecc71", "dash": "dash"}, {"name": "dispenso_cascaded", "x": [1, 2, 3, 4, 6], "y": [103815080.00015552, 67462584.75004652, 46207417.833102226, 38270970.999812044, 34649229.71449726], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [149613010.00039384, 75134707.99995048, 50615046.80015787, 40938315.14260923, 36961543.85709503], "color": "#3498db", "dash": "dash"}, {"name": "tbb_task_group", "x": [1, 2, 3, 4, 6], "y": [150388842.99974597, 74963484.00036368, 50236367.8333495, 40492846.28568525, 36228642.874903016], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "concurrent_vector_serial", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Serial/Access Operations", "categories": ["push back serial", "push back serial reserve", "iterate", "iterate reverse", "lower bound", "index", "random"], "groups": [{"name": "std::vector", "values": [3072432.769241149, 1845768.324510693, 136768.566649875, 223231.17820138438, 79608262.99995461, 136777.56347178397, 4724779.779650616], "color": "#808080"}, {"name": "std::deque", "values": [5289420.59617503, 0, 1072412.1787084688, 821501.9707618625, 99955079.33308546, 775122.2265192381, 5496402.788454501], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [26558054.69985353, 25676702.36368775, 1246598.2711081677, 3655440.7142830314, 108657354.33314209, 3691881.5394768133, 6093960.042562685], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [14667761.894758627, 14737101.263132278, 833397.4005930811, 854359.2522822755, 74007611.74994841, 1895850.0608204228, 5669632.860008278], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "concurrent_vector_parallel", "suite": "concurrent_vector", "type": "grouped_bar_h", "title": "Concurrent Vector - Parallel Operations", "categories": ["parallel", "parallel reserve", "parallel clear", "parallel grow by 10", "parallel grow by 100", "parallel grow by max"], "groups": [{"name": "std::vector", "values": [136648068.9994205, 134157044.33343005, 134396732.33309197, 18439793.500047844, 3325676.6259528515, 985640.7004329821], "color": "#808080"}, {"name": "std::deque", "values": [143536004.00014934, 0, 143588399.6667447, 22642562.57895843, 10747807.777766462, 6101889.086212082], "color": "#FF8C00"}, {"name": "tbb::concurrent_vector", "values": [75401752.49960157, 75114903.25032355, 72413015.00016561, 9255702.333333222, 886992.6044306201, 283090.6588836664], "color": "#4285F4"}, {"name": "dispenso::ConcurrentVector", "values": [34231539.83339944, 34488082.8749865, 32486044.88888102, 3593392.1219510944, 432565.53023335675, 118329.2334460896], "color": "#34A853"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "for_each_1000", "suite": "for_each", "type": "line", "title": "For Each - 1K Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6], "y": [909.9793032527018, 1278.5740430670805, 1104.874453886845, 1208.859321531916, 1662.587611011352], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6], "y": [6150.10428529867, 16516.506474381313, 15588.800591252735, 16748.795263095326, 17642.94723584601], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6], "y": [8002.854016110876, 13821.888177566336, 16791.0029416874, 15866.502046227826, 18214.411768647333], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6], "y": [2665.8799782919, 4726.791070493045, 5366.31389606957, 5521.94104865395, 6092.973243999766], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_1000000", "suite": "for_each", "type": "line", "title": "For Each - 1M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6], "y": [899356.8942339781, 604533.1077561498, 413785.9590039493, 341192.3312416144, 256581.11061489768], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6], "y": [6122648.934805234, 16782241.2940415, 16720896.294132486, 16135785.94119377, 16500798.23529642], "color": "#e67e22"}, {"name": "for_each_n_list", "x": [1, 2, 3, 4, 6], "y": [8033067.742819965, 18143033.000039093, 17601007.562575433, 17037403.64706284, 16896717.35295954], "color": "#3498db"}, {"name": "for_each_n_set", "x": [1, 2, 3, 4, 6], "y": [2715.8142249951875, 4857.855707617169, 5454.740410417156, 5613.515208891196, 6182.183584493816], "color": "#9b59b6"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_each_100000000", "suite": "for_each", "type": "line", "title": "For Each - 100M Elements", "traces": [{"name": "for_each_n", "x": [1, 2, 3, 4, 6], "y": [87534383.66632812, 58416068.40003805, 39041313.42850243, 33879019.66662967, 25401328.272809304], "color": "#2ecc71"}, {"name": "for_each_n_deque", "x": [1, 2, 3, 4, 6], "y": [611865999.0006562, 1662459645.0001264, 1658312666.000711, 1654381362.0010042, 1661199853.000653], "color": "#e67e22"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "for_latency_default", "suite": "for_latency", "type": "line", "title": "For Latency", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [44590499.33332911, 37393657.00000365, 35743766.62504619, 34446670.12490754, 34135986.50013228], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [44767365.66690913, 37070648.2501646, 34706748.24984599, 33506884.875123434, 32804241.624944553], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "future", "suite": "future", "type": "grouped_bar_v", "title": "Future/Async Tree Build Benchmark", "categories": ["Small", "Medium", "Large"], "groups": [{"name": "serial", "values": [685395.1390217933, 5483278.745112543, 48867235.833313316], "color": "#95a5a6"}, {"name": "std::async", "values": [518473436.9995567, 42016983.285585806, 336012690.0006435], "color": "#e74c3c", "error_indices": [1, 2]}, {"name": "folly::Future", "values": [5040517.607150312, 42016983.285585806, 336012690.0006435], "color": "#e67e22"}, {"name": "dispenso::Future", "values": [864298.1775719081, 6090607.840881355, 49559015.4283231], "color": "#3498db"}, {"name": "dispenso::TaskSet", "values": [578304.3958742672, 4553998.2741707545, 36112145.62511122], "color": "#2ecc71"}, {"name": "dispenso::TaskSet (bulk)", "values": [569903.0894325783, 4498795.806431454, 35381546.50004799], "color": "#1e8449"}, {"name": "dispenso::when_all", "values": [2022280.929995759, 14852545.315749777, 125996678.99939959], "color": "#9b59b6"}], "yaxis_unit": "ns", "xaxis": "Tree Size"}, {"id": "graph_bar", "suite": "graph", "type": "bar_h_colored", "title": "Graph Benchmark", "items": [{"name": "taskflow build big tree", "time_ns": 23287857.454554547, "color": "#f39c12"}, {"name": "build big tree (Graph)", "time_ns": 4741128.661037893, "color": "#2ecc71"}, {"name": "build big tree (BiProp)", "time_ns": 5852130.562516321, "color": "#27ae60"}, {"name": "build bi prop dependency chain", "time_ns": 120323.65335046638, "color": "#2ecc71"}, {"name": "build bi prop dependency group", "time_ns": 1363.148034890439, "color": "#2ecc71"}, {"name": "build dependency chain (Graph)", "time_ns": 24804.563219368763, "color": "#2ecc71"}, {"name": "build dependency chain (BiProp)", "time_ns": 25884.07397837702, "color": "#27ae60"}, {"name": "execute dependency chain (Graph)", "time_ns": 13032.079503526009, "color": "#2ecc71"}, {"name": "execute dependency chain (BiProp)", "time_ns": 12996.761960635667, "color": "#27ae60"}, {"name": "forward propagator node (Graph)", "time_ns": 5547313.647077638, "color": "#2ecc71"}, {"name": "forward propagator node (BiProp)", "time_ns": 5575856.470671829, "color": "#27ae60"}], "xaxis_unit": "ns"}, {"id": "graph_scene_bar", "suite": "graph_scene", "type": "bar_h_colored", "title": "Graph Scene Benchmark", "items": [{"name": "scene graph parallel for", "time_ns": 78663150.3336442, "color": "#2ecc71"}, {"name": "scene graph taskflow", "time_ns": 47062964.83315479, "color": "#f39c12"}, {"name": "scene graph concurrent task set", "time_ns": 46913294.33323214, "color": "#2ecc71"}, {"name": "scene graph partial revaluation", "time_ns": 1092137.1517884014, "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "idle_pool_default", "suite": "idle_pool", "type": "line", "title": "Idle Pool", "traces": [{"name": "dispenso_very_idle", "x": [1, 2, 3, 4, 6], "y": [100065.32266622041, 100077.6663331635, 100077.85699963279, 100074.0899999073, 100078.62100004179], "color": "#76d7c4"}, {"name": "tbb_very_idle", "x": [1, 2, 3, 4, 6], "y": [100073.41266646108, 100091.78066684399, 100088.4823333763, 100109.03766669799, 100089.45433340462], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6], "y": [5.388512047507226, 5.374452108898085, 6.202122315257175, 21.59938240001793, 250.70977954493918], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6], "y": [86.42811145395417, 363.28877028350274, 262.9761530415255, 264.7457232821323, 277.6129106769637], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_10000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 10K Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6], "y": [29.967821740861904, 29.369898961096787, 30.019263103390394, 40.114500802134486, 403.68798921687477], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6], "y": [863.6351815389039, 2800.4709999975926, 2671.878269228938, 2657.755219046348, 2758.0932211409304], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "idle_pool_1000000", "suite": "idle_pool", "type": "line", "title": "Idle Pool - 1M Elements", "traces": [{"name": "dispenso_mostly_idle", "x": [1, 2, 3, 4, 6], "y": [2534.003504579315, 2567.7155625122136, 2501.058044236316, 2490.3746846715107, 3068.1916428615555], "color": "#48c9b0"}, {"name": "tbb_mostly_idle", "x": [1, 2, 3, 4, 6], "y": [86042.94266660872, 348001.9100006757, 263200.18300066295, 267316.64200087835, 275950.7569990092], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_100000", "suite": "locality", "type": "line", "title": "Locality - 100K Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [1351112.7536304593, 365026.70239557186, 551542.7495414589, 2004376.8640025519, 7441747.191517199], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [1307148.6915816134, 390006.14878528926, 356044.3936150428, 1996112.846779485, 6121975.054570198], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [1531059.7486287937, 592366.9939032308, 863157.0873499679, 4771257.6274623675, 8007173.833347123], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_4000000", "suite": "locality", "type": "line", "title": "Locality - 4M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [54312310.250224985, 19111224.13330304, 21933038.999962334, 26438269.727309074, 34562191.00007729], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [54404838.80011016, 19081154.66661305, 20672425.923084326, 25814794.272677813, 32391594.666680448], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [55392414.25003638, 21749720.08333498, 26127320.299929123, 53764961.83331862, 81298966.5000714], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "locality_32000000", "suite": "locality", "type": "line", "title": "Locality - 32M Elements", "traces": [{"name": "dispenso_auto", "x": [1, 4, 16, 64, 128], "y": [426584626.0004764, 150174259.99940315, 155882906.49975532, 162923011.0002027, 174092256.99975652], "color": "#00bcd4"}, {"name": "dispenso_static", "x": [1, 4, 16, 64, 128], "y": [424796084.00026333, 155418166.49965766, 157455667.0002494, 160290537.00003713, 169323480.49977008], "color": "#1e8449"}, {"name": "tbb", "x": [1, 4, 16, 64, 128], "y": [459815827.00005674, 153945093.49971485, 177028611.00042355, 274840861.00048447, 362659202.9995663], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_10", "suite": "nested_for", "type": "line", "title": "Nested For - 10 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [6343.2366493602185, 4289.370571703738, 3656.074971468738, 3317.115459337133, 3217.950806010761], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [126296.64252772507, 104807.28426580124, 109019.39847882054, 115444.83776039345, 122594.53968947381], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_500", "suite": "nested_for", "type": "line", "title": "Nested For - 500 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [166220068.5004791, 81883931.66650106, 52152640.5998884, 40874205.49997963, 27851096.624999627], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [170237959.49990016, 83094608.9996966, 52809640.59966209, 40248849.49982758, 26339551.222311758], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_for_2500", "suite": "nested_for", "type": "line", "title": "Nested For - 2K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [4815991470.000881, 2404516853.9996667, 1666806416.0003269, 1386478680.0004368, 941762021.0006135], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [4813509749.9998665, 2407204145.9985485, 1606369535.00034, 1254140176.999499, 852626334.9995134], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_1000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [874.2171607743046, 593.5836369414889, 648.5171113622941, 617.9984957617631, 507.53605919492], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6], "y": [193.48733816334627, 315.5924117651943, 239.9514868088869, 198.72244825259818, 156.18827609792484], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6], "y": [47046.9144999394, 32721.63522221187, 26256.294300037553, 22499.37291662718, 19685.99892864274], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [3052.7143626386533, 1725.198216050041, 1311.0043849811634, 1139.8819183731187, 980.0156338993258], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_10000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [26738.820399987162, 8693.613545498616, 11112.886519986205, 12384.612999994715, 14443.582149942813], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6], "y": [5180.161592593154, 4850.9469482676905, 3886.2980704258184, 3099.1849222118617, 2849.2425673133625], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6], "y": [1363512.9480007892, 985011.9729999278, 779945.5469994427, 747863.5650004435, 700121.8929999595], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [92605.3910000822, 47964.084666697694, 34299.93174995616, 27905.627799918875, 21975.713999988027], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "nested_pool_300000", "suite": "nested_pool", "type": "line", "title": "Nested Pool - 300K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [4296235.063000495, 572158.9629993105, 687859.5990001486, 832972.6869997103, 1237114.3419986765], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6], "y": [785011.7970010615, 404659.6230000432, 319369.9280000146, 268867.1619998786, 262039.9740007997], "color": "#1e8449", "dash": "dash"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [15115136.610000264, 7559557.2109996285, 5225128.906000463, 4189370.2089982983, 2970313.525000165], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "skipped_libs": [{"name": "folly", "color": "#9b59b6"}]}, {"id": "once_function_move", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Move Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Once Function", "values": [6.280473378439153, 6.1327959880093506, 6.299513564978442, 25.703163273476257], "color": "#2ecc71"}, {"name": "Std Function", "values": [66.94997690153612, 65.9076352199143, 69.33510034138529, 84.79090654403211], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "once_function_queue", "suite": "once_function", "type": "grouped_bar_h", "title": "Once Function - Queue Operations", "categories": ["Small", "Medium", "Large", "Extra Large"], "groups": [{"name": "Inline Function", "values": [1392.9191585353794, 3046.6177007596016, 5501.283976281524, 14178.460244519238], "color": "#3498db"}, {"name": "Once Function", "values": [3141.3188963538773, 3957.286294911814, 5558.722151877583, 256301.4055755807], "color": "#2ecc71"}, {"name": "Std Function", "values": [15440.894206410487, 30537.81559359709, 42701.67192972245, 133131.56717196046], "color": "#e74c3c"}], "xaxis_unit": "ns"}, {"id": "pipeline_bar", "suite": "pipeline", "type": "bar_h_colored", "title": "Pipeline Benchmark", "items": [{"name": "serial", "time_ns": 518612030.99906564, "color": "#95a5a6"}, {"name": "dispenso", "time_ns": 227088720.00112024, "color": "#2ecc71"}, {"name": "tbb", "time_ns": 485735985.9997814, "color": "#3498db"}, {"name": "taskflow", "time_ns": 508602915.00066525, "color": "#f39c12"}, {"name": "dispenso par", "time_ns": 106832510.49977116, "color": "#2ecc71"}, {"name": "tbb par", "time_ns": 147814415.99916434, "color": "#3498db"}, {"name": "taskflow par", "time_ns": 506115882.99943835, "color": "#f39c12"}], "xaxis_unit": "ns"}, {"id": "pool_allocator_1t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - Single-threaded", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [3171504.2272770177, 12764288.681782173, 11260113.600001205, 45310542.666811675, 11044482.31999413, 44925846.499912344], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [185259.30033076904, 743786.1866686338, 185060.52475269898, 742972.157332891, 185043.61543511794, 743036.143618117], "color": "#2ecc71"}, {"name": "Arena", "values": [102987.60727429499, 412765.5646364461, 102992.08486372787, 412837.4243338856, 102989.20286616778, 412740.4918270537], "color": "#27ae60"}, {"name": "NoLock", "values": [73305.2055512174, 293794.134041549, 73700.7398799987, 294520.71868323715, 73380.2209730875, 294322.11583446217], "color": "#3498db"}, {"name": "NoLock Arena", "values": [47560.18005078068, 190938.68360479805, 47479.94698640253, 190626.61690568112, 47662.57159827656, 193618.04211949557], "color": "#2980b9"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_2t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 2 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [6299882.919993252, 25321330.599945214, 21622887.599914975, 80309122.6000106, 26747051.266769025, 100668801.60011351], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [1403284.6926810879, 5504953.404765512, 1470129.201972193, 5476923.529403946, 1400960.111670872, 5937744.0000166735], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_8t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 8 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [20791680.333382424, 93871224.66675161, 107697367.49989533, 306510603.99941343, 113016521.49995789, 532290417.000695], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [25029213.85712398, 120719413.00018807, 31215094.25000113, 107461886.00023742, 27231860.444468718, 125520012.4996918], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "pool_allocator_16t", "suite": "pool_allocator", "type": "grouped_bar_h", "title": "Pool Allocator - 16 Threads", "categories": ["S/8K", "S/32K", "M/8K", "M/32K", "L/8K", "L/32K"], "groups": [{"name": "malloc/free", "values": [34918624.764673844, 160791669.74986947, 153353661.33331263, 699948767.0000235, 170880763.66659757, 740298406.9995], "color": "#e74c3c"}, {"name": "PoolAllocator", "values": [93409683.74999647, 353324905.0005907, 98188561.7999069, 350214514.33325034, 106705051.11112207, 371980714.0005287], "color": "#2ecc71"}], "xaxis_unit": "ns", "log_scale": true}, {"id": "rw_lock_serial", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Serial Operations", "categories": ["2 iterations", "8 iterations", "32 iterations", "128 iterations", "512 iterations"], "groups": [{"name": "NopMutex", "values": [929544.7558530779, 934083.3924928682, 917672.5639351434, 937324.9200022352, 936990.6565675302], "color": "#95a5a6"}, {"name": "std::shared_mutex", "values": [44912330.833237015, 44319648.49997409, 43844136.33336711, 43696445.33320146, 43647391.49999271], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [14812931.894802636, 13551633.28575838, 13201997.85714933, 13036749.809543833, 12991488.045455215], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_2", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (2 iterations)", "categories": ["1 threads", "2 threads", "4 threads"], "groups": [{"name": "std::shared_mutex", "values": [45217851.83351312, 430456606.00000703, 2176589904.998764], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [14808208.68422789, 187428247.9993781, 887020244.9998033], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_8", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (8 iterations)", "categories": ["1 threads", "2 threads", "4 threads"], "groups": [{"name": "std::shared_mutex", "values": [44433893.16656976, 362874046.9994227, 1909084409.0005703], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [13540147.666693276, 125367486.50018126, 454458952.00100493], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_32", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (32 iterations)", "categories": ["1 threads", "2 threads", "4 threads"], "groups": [{"name": "std::shared_mutex", "values": [44250785.66691809, 361754671.99973357, 1422208474.999934], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [13174316.809541086, 88666536.66700585, 243517941.00039116], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_128", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (128 iterations)", "categories": ["1 threads", "2 threads", "4 threads"], "groups": [{"name": "std::shared_mutex", "values": [43780099.833384156, 417823947.9988406, 911962902.9992211], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [12997451.590887398, 77645372.25011736, 167798219.9998951], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "rw_lock_parallel_512", "suite": "rw_lock", "type": "grouped_bar_h", "title": "RW Lock - Parallel (512 iterations)", "categories": ["1 threads", "2 threads", "4 threads"], "groups": [{"name": "std::shared_mutex", "values": [43770282.16670927, 442135512.9998119, 669293295.0008981], "color": "#e74c3c"}, {"name": "dispenso::RWLock", "values": [12963905.181839602, 73627051.75000884, 147917020.0000226], "color": "#2ecc71"}], "xaxis_unit": "ns"}, {"id": "simple_for_1000", "suite": "simple_for", "type": "line", "title": "Simple For - 1K Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [3.6400160752236843e-07, 3.6400160752236843e-07], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [784.9189316885689, 784.2254898128845, 785.0735280539584, 786.0184957256699, 785.3296723368471], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6], "y": [475413.3799997362, 487887.13199974154, 388376.7569986958, 339490.451000529, 317386.021000857], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [91045.54227707216, 92005.24330578845, 100232.79299280152, 101569.0491261831, 109073.37948739853], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_1000000", "suite": "simple_for", "type": "line", "title": "Simple For - 1M Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [3.639997885329649e-07, 3.639997885329649e-07], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [843640.1114456849, 600138.8365578067, 409896.41666740115, 331123.7943006877, 252982.2695995691], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6], "y": [22203000.870360013, 23582887.766663894, 13557144.579990564, 10253444.769987253, 7517060.129994206], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [1324021.5539895494, 712905.9462911759, 531021.5578755296, 450605.52312580135, 395989.24783842673], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_for_100000000", "suite": "simple_for", "type": "line", "title": "Simple For - 100M Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [3.650002327049151e-07, 3.650002327049151e-07], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [75920489.24971095, 57270130.600227274, 38690896.71440763, 33102457.750146642, 25235859.5454015], "color": "#2ecc71"}, {"name": "taskflow", "x": [1, 2, 3, 4, 6], "y": [152300084.33294037, 151988021.14303234, 156515455.9091091, 147216677.0001119, 150376599.16667205], "color": "#f39c12"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [112580809.66642107, 56153088.80005614, 37901834.143018015, 29182514.44457888, 25143932.36362428], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns", "no_auto_zoom": true}, {"id": "simple_pool_1000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [25.95345415691071, 18.872207139982944, 223.55017048906728, 290.80397533440083, 444.4975647583119], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6], "y": [3.8957624627460876, 20.73410827078801, 116.03614734546915, 118.67673490973019, 140.77349376445852], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6], "y": [672.7934915669534, 622.4681580112516, 652.3204124424674, 663.499406325511, 679.2606675040469], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [90.03957474230924, 259.0293478247569, 262.4333990563585, 273.57316764154865, 298.0061547389893], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_10000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 10K Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [258.86781515741933, 156.22130923273875, 2019.6027999918442, 2928.119788451183, 4294.932967211219], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6], "y": [38.01663068179157, 525.5522752972432, 1082.700568893213, 1192.9172217617227, 1296.6645304360377], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6], "y": [6360.496613607128, 5870.0094468030275, 6281.448295463964, 6450.842250036327, 6499.728116301243], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [910.0875213155886, 2630.096231465965, 2660.879733334046, 2662.4582912597707, 2860.4586210444963], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "simple_pool_1000000", "suite": "simple_pool", "type": "line", "title": "Simple Pool - 1M Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [25882.57354545931, 30368.695111165936, 225749.34499971278, 281113.4650000895, 423006.24000017706], "color": "#2ecc71"}, {"name": "dispenso_bulk", "x": [1, 2, 3, 4, 6], "y": [3724.5934266684344, 19238.253357018817, 118575.74099940393, 118438.63149988465, 129276.393499822], "color": "#1e8449", "dash": "dash"}, {"name": "folly", "x": [1, 2, 3, 4, 6], "y": [642591.6270000016, 578158.0260008923, 615674.4370000524, 631774.0729991783, 644738.7099997286], "color": "#9b59b6"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [228194.91799964453, 265269.81800088834, 262043.30699874845, 270964.8700001708, 307126.74900132697], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "small_buffer", "suite": "small_buffer", "type": "bar_h_colored", "title": "Small Buffer Allocator Benchmark", "items": [{"name": "newdelete[S]/8192", "time_ns": 584017.6375007407, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192", "time_ns": 105516.16572744322, "color": "#2ecc71"}, {"name": "newdelete[S]/8192/threads:16", "time_ns": 304266.9990218627, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/8192/threads:16", "time_ns": 14257.33724899483, "color": "#2ecc71"}, {"name": "newdelete[S]/32768", "time_ns": 2515437.8482251815, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768", "time_ns": 421473.33383342664, "color": "#2ecc71"}, {"name": "newdelete[S]/32768/threads:16", "time_ns": 1650520.7968506853, "color": "#e74c3c"}, {"name": "small buffer allocator[S]/32768/threads:16", "time_ns": 67319.2164276847, "color": "#2ecc71"}, {"name": "newdelete[M]/8192", "time_ns": 843822.1891927177, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192", "time_ns": 112088.35972895606, "color": "#2ecc71"}, {"name": "newdelete[M]/8192/threads:16", "time_ns": 522474.75521246163, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/8192/threads:16", "time_ns": 13103.715444592523, "color": "#2ecc71"}, {"name": "newdelete[M]/32768", "time_ns": 3519757.7341667837, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768", "time_ns": 447662.1182107373, "color": "#2ecc71"}, {"name": "newdelete[M]/32768/threads:16", "time_ns": 2673324.847648928, "color": "#e74c3c"}, {"name": "small buffer allocator[M]/32768/threads:16", "time_ns": 83240.86666675612, "color": "#2ecc71"}, {"name": "newdelete[L]/8192", "time_ns": 1198022.0641016474, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192", "time_ns": 114935.6353612535, "color": "#2ecc71"}, {"name": "newdelete[L]/8192/threads:16", "time_ns": 632238.6276001643, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/8192/threads:16", "time_ns": 17412.032122255594, "color": "#2ecc71"}, {"name": "newdelete[L]/32768", "time_ns": 4918945.105249836, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768", "time_ns": 458002.61174738454, "color": "#2ecc71"}, {"name": "newdelete[L]/32768/threads:16", "time_ns": 2408651.7695067753, "color": "#e74c3c"}, {"name": "small buffer allocator[L]/32768/threads:16", "time_ns": 78231.75781295355, "color": "#2ecc71"}], "xaxis_unit": "ns", "legend": [{"name": "SmallBufferAllocator", "color": "#2ecc71"}, {"name": "new/delete", "color": "#e74c3c"}]}, {"id": "summing_for_1000", "suite": "summing_for", "type": "line", "title": "Summing For - 1K Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [251.80944507283513, 251.80944507283513], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [316.497034359714, 315.0814294150636, 315.71763249400396, 316.05906242349005, 316.64861844198214], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [90270.4190884545, 91385.99322991195, 90688.41631455503, 100682.43566867914, 108105.93506971282], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_1000000", "suite": "summing_for", "type": "line", "title": "Summing For - 1M Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [263624.59548326954, 263624.59548326954], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [264033.47058854304, 135175.76986459055, 94069.00651105503, 95777.05272045032, 100219.8986292068], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [361091.89766770863, 226209.27751286596, 197604.07426051615, 202477.5891421317, 222307.40308201284], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "summing_for_100000000", "suite": "summing_for", "type": "line", "title": "Summing For - 100M Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [27690278.79985515, 27690278.79985515], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [26737148.50006945, 14624502.526299022, 12813765.454583602, 12872484.68179314, 13727931.050016195], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [27508091.299932856, 14526300.789408483, 12690290.09092099, 13073473.523815794, 13904092.550001225], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "timed_task", "suite": "timed_task", "type": "grouped_bar_h_error", "title": "Timed Task - Scheduling Jitter", "categories": ["2ms (Normal)", "2ms (Steady)", "4ms (Normal)", "4ms (Steady)", "6ms (Normal)", "6ms (Steady)", "Mixed (Normal)", "Mixed (Steady)"], "groups": [{"name": "dispenso", "values": [13.026822916666122, 19.57994793744866, 14.833593750000032, 14.620833376589697, 17.78177083333242, 18.441666644356136, 16.13559027778929, 16.044704889427603], "errors": [9.531265906036618, 82.6768700793036, 8.592811971375008, 12.271914481560083, 15.400723025035576, 35.79859605078633, 45.6681620180489, 57.3418443833851], "color": "#2ecc71"}, {"name": "folly", "values": [74.70572916667035, 64.17109385951032, 83.56223958333587, 76.62213528658049, 95.74322916667717, 66.45494789463045, 71.40833333333728, 62.134722133525635], "errors": [133.00534118277457, 9.798314414891916, 270.05312700179473, 101.70333856705504, 409.1264225462286, 9.144976228540143, 196.9415560385558, 7.071315222038903], "color": "#9b59b6"}], "xaxis_label": "Mean Scheduling Error (us)"}, {"id": "trivial_compute_100", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100 Elements", "traces": [{"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [1414.8151024542524, 1414.4553178160272, 1415.6990758309155, 1416.666757713665, 1413.2402885015313], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [89707.3289211519, 90376.59711030315, 89685.87830816176, 96953.10747812747, 104725.39985131778], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_1000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 1M Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [109626069.66677414, 109626069.66677414], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [108036694.66622524, 54460974.199901104, 36234879.87488261, 27627096.499963958, 19139120.46158937], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [109009785.33330393, 54629807.600213096, 36805036.25014353, 27842325.69993037, 20210388.928587366], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}, {"id": "trivial_compute_100000000", "suite": "trivial_compute", "type": "line", "title": "Trivial Compute - 100M Elements", "traces": [{"name": "serial", "x": [1, 6], "y": [1486211970.9996479, 1486211970.9996479], "color": "#95a5a6", "dash": "dash", "baseline": true}, {"name": "dispenso", "x": [1, 2, 3, 4, 6], "y": [1425677068.9989934, 735477416.9998564, 501226352.00023067, 376238369.9995553, 258120546.99963483], "color": "#2ecc71"}, {"name": "tbb", "x": [1, 2, 3, 4, 6], "y": [1431572797.9992516, 716690019.0004526, 477958485.00027204, 371549829.000287, 251682889.0002216], "color": "#3498db"}], "xaxis": "Threads", "yaxis_unit": "ns"}]}];
let currentPlatformIdx = 0;
let currentTheme = 'dark';

// ─── Time unit helpers ─────────────────────────────────────────
function autoUnit(maxNs) {
  if (maxNs >= 1e9) return { scale: 1e9, label: 's' };
  if (maxNs >= 1e6) return { scale: 1e6, label: 'ms' };
  if (maxNs >= 1e3) return { scale: 1e3, label: 'us' };
  return { scale: 1, label: 'ns' };
}

// ─── Theme ─────────────────────────────────────────────────────
function themeLayout() {
  const dark = currentTheme === 'dark';
  return {
    paper_bgcolor: dark ? '#1c2128' : '#fff',
    plot_bgcolor: dark ? '#0d1117' : '#f6f8fa',
    font: { color: dark ? '#e6edf3' : '#1f2328', family: '-apple-system,BlinkMacSystemFont,Segoe UI,sans-serif', size: 12 },
    xaxis: { gridcolor: dark ? '#21262d' : '#eaeef2', zerolinecolor: dark ? '#30363d' : '#d0d7de' },
    yaxis: { gridcolor: dark ? '#21262d' : '#eaeef2', zerolinecolor: dark ? '#30363d' : '#d0d7de' },
    legend: { bgcolor: dark ? 'rgba(22,27,34,0.9)' : 'rgba(255,255,255,0.95)', bordercolor: dark ? '#30363d' : '#d0d7de', borderwidth: 1 },
    modebar: { orientation: 'h', bgcolor: 'transparent', activecolor: dark ? '#58a6ff' : '#0969da' },
  };
}

const plotlyConfig = { responsive: true, scrollZoom: true, displayModeBar: true, modeBarButtonsToRemove: ['lasso2d','select2d'], displaylogo: false };

// Pin the time axis lower bound to zero on any zoom/pan.
// Preserves the zoom span so repeated zoom in/out doesn't drift.
function pinZeroAxis(chartId, axis) {
  const el = document.getElementById('chart-' + chartId);
  if (!el) return;
  let pinning = false;
  el.on('plotly_relayout', function(ed) {
    if (pinning) return;
    const key0 = axis + '.range[0]';
    const key1 = axis + '.range[1]';
    // Check if the lower bound moved away from zero
    if (key0 in ed && ed[key0] !== 0) {
      const lo = ed[key0];
      const hi = key1 in ed ? ed[key1] : el.layout[axis].range[1];
      const span = hi - lo;  // preserve intended zoom level
      pinning = true;
      Plotly.relayout(el, { [key0]: 0, [key1]: span }).then(() => { pinning = false; });
    }
  });
}

// ─── Renderers ─────────────────────────────────────────────────
function renderLine(cfg) {
  const allY = cfg.traces.flatMap(t => t.baseline ? [] : t.y);  // exclude serial baseline
  const maxY = allY.length > 0 ? Math.max(...allY) : 1;
  const u = autoUnit(maxY);
  const scaledAll = allY.map(v => v / u.scale);

  // Auto-zoom: compare per-trace medians to detect when one trace dominates
  const traceMedians = cfg.traces.filter(t => !t.baseline).map(t => {
    const sv = [...t.y].filter(v => v > 0).sort((a,b) => a - b);
    return sv.length > 0 ? sv[Math.floor(sv.length / 2)] / u.scale : 0;
  }).filter(v => v > 0).sort((a,b) => a - b);
  const maxTraceMedian = traceMedians.length > 0 ? traceMedians[traceMedians.length - 1] : 1;
  const minTraceMedian = traceMedians.length > 0 ? traceMedians[0] : 1;
  const autoZoom = !cfg.no_auto_zoom && traceMedians.length >= 2 && maxTraceMedian / minTraceMedian > 5;
  // Zoom to show the competitive traces: 3x the second-highest trace median
  const secondMax = traceMedians.length >= 2 ? traceMedians[traceMedians.length - 2] : maxTraceMedian;
  const zoomMax = secondMax * 3;

  const traces = cfg.traces.map(t => ({
    x: t.x, y: t.y.map(v => v / u.scale), name: t.name,
    type: 'scatter', mode: t.baseline ? 'lines' : 'lines+markers',
    line: { color: t.color, width: t.baseline ? 1.5 : 2.5, dash: t.dash || 'solid' },
    marker: { size: t.baseline ? 0 : 5 },
    hovertemplate: '%{x} threads: %{y:.4g} ' + u.label + '<extra>%{fullData.name}</extra>',
  }));

  // Add invisible legend entries for skipped (cannot-complete) libraries
  const skipped = cfg.skipped_libs || [];
  for (const sl of skipped) {
    traces.push({
      x: [null], y: [null], name: sl.name + ' (CANNOT COMPLETE)',
      type: 'scatter', mode: 'markers',
      marker: { size: 12, color: sl.color, symbol: 'x', line: { width: 2, color: sl.color } },
      hoverinfo: 'name',
      showlegend: true,
    });
  }

  const tl = themeLayout();
  // Add text annotation when libraries are skipped
  const annotations = [];
  if (skipped.length > 0) {
    const names = skipped.map(s => s.name).join(', ');
    annotations.push({
      text: names + ': cannot complete (out of memory)',
      xref: 'paper', yref: 'paper', x: 0.5, y: -0.12,
      showarrow: false,
      font: { size: 11, color: skipped.length === 1 ? skipped[0].color : '#e74c3c' },
    });
  }
  const layout = {
    ...tl, title: { text: cfg.title + (autoZoom ? ' (auto-zoomed, double-click to reset)' : ''), font: { size: 14 } },
    xaxis: { ...tl.xaxis, title: cfg.xaxis || 'Threads', fixedrange: true },
    yaxis: { ...tl.yaxis, title: 'Time (' + u.label + ')', rangemode: 'tozero',
              ...(autoZoom ? { range: [0, zoomMax] } : {}) },
    legend: { ...tl.legend, orientation: 'h', yanchor: 'bottom', y: 1.02, xanchor: 'right', x: 1 },
    margin: { l: 60, r: 20, t: 50, b: skipped.length > 0 ? 70 : 50 }, hovermode: 'x unified',
    ...(annotations.length > 0 ? { annotations } : {}),
  };
  Plotly.newPlot('chart-' + cfg.id, traces, layout, plotlyConfig);
  pinZeroAxis(cfg.id, 'yaxis');
}

function renderGroupedBarH(cfg) {
  const allV = cfg.groups.flatMap(g => g.values);
  const maxV = Math.max(...allV.filter(v => v > 0));
  const u = autoUnit(maxV);
  const useLog = cfg.log_scale && maxV / Math.min(...allV.filter(v => v > 0)) > 50;
  // Reverse so first category is at top
  const cats = [...cfg.categories].reverse();
  const traces = cfg.groups.map(g => ({
    type: 'bar', orientation: 'h', name: g.name,
    y: cats, x: [...g.values].reverse().map(v => useLog ? v : v / u.scale),
    marker: { color: g.color, opacity: 0.85 },
    hovertemplate: useLog
      ? '%{y}: %{x:.4g} ns<extra>%{fullData.name}</extra>'
      : '%{y}: %{x:.4g} ' + u.label + '<extra>%{fullData.name}</extra>',
  }));
  const tl = themeLayout();
  const el = document.getElementById('chart-' + cfg.id);
  if (el) el.style.minHeight = Math.max(350, cfg.categories.length * 40 + 100) + 'px';
  Plotly.newPlot('chart-' + cfg.id, traces, {
    ...tl, barmode: 'group', bargap: 0.15, bargroupgap: 0.1,
    title: { text: cfg.title, font: { size: 14 } },
    xaxis: { ...tl.xaxis, title: useLog ? 'Time (ns) - log scale' : 'Time (' + u.label + ')',
              type: useLog ? 'log' : 'linear' },
    yaxis: { ...tl.yaxis, automargin: true, fixedrange: true },
    legend: { ...tl.legend, orientation: 'h', yanchor: 'bottom', y: 1.02, xanchor: 'right', x: 1 },
    margin: { l: 180, r: 20, t: 60, b: 50 },
  }, plotlyConfig);
  if (!useLog) pinZeroAxis(cfg.id, 'xaxis');
}

function renderGroupedBarV(cfg) {
  const allV = cfg.groups.flatMap(g => g.values);
  const maxV = Math.max(...allV.filter(v => v > 0));
  const u = autoUnit(maxV);

  // Auto-zoom: compare per-group MAX values to detect outlier groups.
  // Using max (not median) ensures we don't clip the tallest competitive bars.
  // Exclude error-placeholder values from zoom calculation.
  const groupMaxes = cfg.groups.map(g => {
    const errSet = new Set(g.error_indices || []);
    const pos = g.values.filter((v, i) => v > 0 && !errSet.has(i));
    return pos.length > 0 ? Math.max(...pos) : 0;
  }).filter(v => v > 0).sort((a,b) => a - b);
  const maxGroupMax = groupMaxes.length > 0 ? groupMaxes[groupMaxes.length - 1] : 1;
  const secondGroupMax = groupMaxes.length >= 2 ? groupMaxes[groupMaxes.length - 2] : maxGroupMax;
  const autoZoom = groupMaxes.length >= 2 && maxGroupMax / secondGroupMax > 5;
  const zoomMax = (secondGroupMax * 1.5) / u.scale;

  const traces = cfg.groups.map(g => {
    const n = g.values.length;
    const errSet = new Set(g.error_indices || []);
    const hasErrors = errSet.size > 0;

    // Per-bar colors and patterns for error bars
    const colors = g.values.map((_, i) => errSet.has(i) ? 'rgba(200,200,200,0.25)' : g.color);
    const patterns = hasErrors ? {
      shape: g.values.map((_, i) => errSet.has(i) ? '/' : ''),
      fgcolor: g.values.map((_, i) => errSet.has(i) ? '#e74c3c' : g.color),
      solidity: 0.4,
    } : undefined;
    // Custom hover: show "CANNOT COMPLETE" for error bars
    const customdata = g.values.map((_, i) => errSet.has(i) ? 'CANNOT COMPLETE' : '');
    const hovertemplate = g.values.map((_, i) =>
      errSet.has(i)
        ? '%{x}: <b>CANNOT COMPLETE</b><extra>' + g.name + '</extra>'
        : '%{x}: %{y:.4g} ' + u.label + '<extra>' + g.name + '</extra>'
    );

    return {
      type: 'bar', name: g.name,
      x: cfg.categories, y: g.values.map(v => v / u.scale),
      marker: { color: colors, opacity: 0.85, ...(patterns ? { pattern: patterns } : {}) },
      customdata, hovertemplate,
      // Text label inside error bars
      text: g.values.map((_, i) => errSet.has(i) ? 'CANNOT<br>COMPLETE' : ''),
      textposition: 'inside', insidetextanchor: 'middle', textangle: -90,
      textfont: { color: '#e74c3c', size: 11 },
    };
  });
  const tl = themeLayout();
  Plotly.newPlot('chart-' + cfg.id, traces, {
    ...tl, barmode: 'group', bargap: 0.2,
    title: { text: cfg.title + (autoZoom ? ' (auto-zoomed, double-click to reset)' : ''), font: { size: 14 } },
    xaxis: { ...tl.xaxis, title: cfg.xaxis || '', fixedrange: true },
    yaxis: { ...tl.yaxis, title: 'Time (' + u.label + ')', rangemode: 'tozero',
              ...(autoZoom ? { range: [0, zoomMax] } : {}) },
    legend: { ...tl.legend, title: { text: 'Implementation' } },
    margin: { l: 60, r: 20, t: 50, b: 50 },
  }, plotlyConfig);
  pinZeroAxis(cfg.id, 'yaxis');
}

function renderBarHColored(cfg) {
  const maxV = Math.max(...cfg.items.map(i => i.time_ns));
  const minV = Math.min(...cfg.items.filter(i => i.time_ns > 0).map(i => i.time_ns));
  const u = autoUnit(maxV);
  const useLog = maxV / minV > 100;
  // Reverse for top-to-bottom display
  const items = [...cfg.items].reverse();
  const trace = {
    type: 'bar', orientation: 'h',
    y: items.map(i => i.name), x: items.map(i => useLog ? i.time_ns : i.time_ns / u.scale),
    marker: { color: items.map(i => i.color), opacity: 0.85 },
    hovertemplate: useLog
      ? '%{y}: %{x:.4g} ns<extra></extra>'
      : '%{y}: %{x:.4g} ' + u.label + '<extra></extra>',
  };
  const tl = themeLayout();
  const el = document.getElementById('chart-' + cfg.id);
  if (el) el.style.minHeight = Math.max(350, cfg.items.length * 24 + 100) + 'px';

  const layout = {
    ...tl, showlegend: false,
    title: { text: cfg.title, font: { size: 14 } },
    xaxis: { ...tl.xaxis, title: useLog ? 'Time (ns) - log scale' : 'Time (' + u.label + ')',
              type: useLog ? 'log' : 'linear' },
    yaxis: { ...tl.yaxis, automargin: true, tickfont: { size: 10 }, fixedrange: true },
    margin: { l: 250, r: 20, t: 50, b: 50 }, bargap: 0.3,
  };

  // Add manual legend if provided
  if (cfg.legend) {
    layout.showlegend = true;
    // Add invisible traces for legend
    const traces = [trace];
    cfg.legend.forEach(l => {
      traces.push({
        type: 'bar', orientation: 'h', x: [null], y: [null],
        marker: { color: l.color }, name: l.name, showlegend: true,
      });
    });
    trace.showlegend = false;
    Plotly.newPlot('chart-' + cfg.id, traces, layout, plotlyConfig);
    if (!useLog) pinZeroAxis(cfg.id, 'xaxis');
    return;
  }
  Plotly.newPlot('chart-' + cfg.id, [trace], layout, plotlyConfig);
  if (!useLog) pinZeroAxis(cfg.id, 'xaxis');
}

function renderGroupedBarHError(cfg) {
  // Reverse for top-to-bottom
  const cats = [...cfg.categories].reverse();
  const traces = cfg.groups.map(g => {
    const vals = [...g.values].reverse();
    const errs = [...g.errors].reverse();
    // Clamp lower error bars so they don't go below zero (scheduling error can't be negative)
    const errMinus = vals.map((v, i) => Math.min(errs[i], v));
    return {
      type: 'bar', orientation: 'h', name: g.name,
      y: cats, x: vals,
      error_x: { type: 'data', array: errs, arrayminus: errMinus, visible: true },
      marker: { color: g.color, opacity: 0.85 },
      hovertemplate: '%{y}: %{x:.2f} +/- %{error_x.array:.2f} us<extra>%{fullData.name}</extra>',
    };
  });
  const tl = themeLayout();
  Plotly.newPlot('chart-' + cfg.id, traces, {
    ...tl, barmode: 'group', bargap: 0.2,
    title: { text: cfg.title, font: { size: 14 } },
    xaxis: { ...tl.xaxis, title: cfg.xaxis_label || 'Mean Error (us)' },
    yaxis: { ...tl.yaxis, automargin: true, fixedrange: true },
    legend: { ...tl.legend },
    margin: { l: 200, r: 20, t: 50, b: 50 },
  }, plotlyConfig);
  pinZeroAxis(cfg.id, 'xaxis');
}

function renderChart(cfg) {
  switch (cfg.type) {
    case 'line': renderLine(cfg); break;
    case 'grouped_bar_h': renderGroupedBarH(cfg); break;
    case 'grouped_bar_v': renderGroupedBarV(cfg); break;
    case 'bar_h_colored': renderBarHColored(cfg); break;
    case 'grouped_bar_h_error': renderGroupedBarHError(cfg); break;
  }
}

// ─── Platform switching ────────────────────────────────────────
function buildInfoCards(machine) {
  const compiler = machine.compiler || {};
  document.getElementById('infoCards').innerHTML = `
    <div class="info-card"><div class="label">Platform</div><div class="value">${machine.platform} ${machine.architecture||''}</div><div class="detail">${machine.platform_release||''}</div></div>
    <div class="info-card"><div class="label">Processor</div><div class="value">${machine.cpu_model}</div><div class="detail">${machine.cpu_cores} cores &middot; ${Math.round(machine.memory_gb||0)} GB RAM</div></div>
    <div class="info-card"><div class="label">Compiler</div><div class="value">${compiler.compiler_summary||'Unknown'}</div><div class="detail">C++${compiler.cxx_standard||'??'} &middot; ${compiler.build_type||''}</div></div>
    <div class="info-card"><div class="label">Run Date</div><div class="value">${(machine.timestamp||'').slice(0,10)}</div><div class="detail">${(machine.timestamp||'').slice(11,19)}</div></div>
  `;
}

function buildSections(charts) {
  // Group by suite
  const suites = new Map();
  charts.forEach(c => {
    if (!suites.has(c.suite)) suites.set(c.suite, []);
    suites.get(c.suite).push(c);
  });

  let html = '';
  suites.forEach((chartList, suite) => {
    const display = suite.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
    html += `<section id="section-${suite}" class="benchmark-section"><h2 class="section-title">${display}</h2>`;
    chartList.forEach(c => {
      html += `<div class="chart-container"><div id="chart-${c.id}" class="chart"></div></div>`;
    });
    html += '</section>';
  });
  document.getElementById('chartSections').innerHTML = html;
}

function switchPlatform(idx) {
  currentPlatformIdx = parseInt(idx);
  const p = PLATFORMS[currentPlatformIdx];
  buildInfoCards(p.machine);
  document.getElementById('statsBar').innerHTML = `
    <div class="stat-item"><span class="stat-dot info"></span><span>Suites: <strong>${new Set(p.charts.map(c=>c.suite)).size}</strong></span></div>
    <div class="stat-item"><span class="stat-dot ok"></span><span>Charts: <strong>${p.charts.length}</strong></span></div>
  `;
  buildSections(p.charts);
  // Small delay so DOM is ready
  requestAnimationFrame(() => {
    p.charts.forEach(renderChart);
    observeSections();
  });
}

function toggleTheme() {
  currentTheme = currentTheme === 'dark' ? 'light' : 'dark';
  document.documentElement.setAttribute('data-theme', currentTheme);
  document.getElementById('themeBtn').textContent = currentTheme === 'dark' ? 'Light' : 'Dark';
  const p = PLATFORMS[currentPlatformIdx];
  p.charts.forEach(renderChart);
}

function filterNav(q) {
  q = q.toLowerCase();
  document.querySelectorAll('.nav-link').forEach(l => {
    l.style.display = l.textContent.toLowerCase().includes(q) ? 'block' : 'none';
  });
}

let observer;
function observeSections() {
  if (observer) observer.disconnect();
  observer = new IntersectionObserver(entries => {
    entries.forEach(e => {
      if (e.isIntersecting) {
        const id = e.target.id.replace('section-', '');
        document.querySelectorAll('.nav-link').forEach(l => {
          l.classList.toggle('active', l.dataset.suite === id);
        });
      }
    });
  }, { rootMargin: '-20% 0px -70% 0px' });
  document.querySelectorAll('.benchmark-section').forEach(s => observer.observe(s));
}

// ─── Init ──────────────────────────────────────────────────────
const sel = document.getElementById('platformSelect');
PLATFORMS.forEach((p, i) => {
  const opt = document.createElement('option');
  opt.value = i; opt.textContent = p.label;
  sel.appendChild(opt);
});
switchPlatform(0);
</script>
</body>
</html>


================================================
FILE: docs/benchmarks/nested_for_details.md
================================================
# nested_for - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_serial<kSmallSize> | 5352.63 | ns | 124322 |
| BM_omp/12/10/real_time | 5377.81 | ns | 131563 |
| BM_dispenso/1/10/real_time | 5579.38 | ns | 120210 |
| BM_omp/6/10/real_time | 5747.11 | ns | 103810 |
| BM_omp/16/10/real_time | 5961.13 | ns | 117578 |
| BM_omp/8/10/real_time | 6296.00 | ns | 120132 |
| BM_dispenso/3/10/real_time | 6932.16 | ns | 100625 |
| BM_dispenso/2/10/real_time | 7243.91 | ns | 95178 |
| BM_dispenso/4/10/real_time | 7278.11 | ns | 94618 |
| BM_omp/24/10/real_time | 7365.84 | ns | 94022 |
| BM_dispenso/8/10/real_time | 7483.65 | ns | 92491 |
| BM_dispenso/6/10/real_time | 7631.00 | ns | 93758 |
| BM_omp/4/10/real_time | 7654.10 | ns | 89573 |
| BM_omp/32/10/real_time | 8834.18 | ns | 77211 |
| BM_dispenso/12/10/real_time | 9020.92 | ns | 77639 |
| BM_omp/3/10/real_time | 9420.08 | ns | 72953 |
| BM_dispenso/16/10/real_time | 10658.85 | ns | 65938 |
| BM_omp/2/10/real_time | 11625.56 | ns | 60704 |
| BM_omp/48/10/real_time | 12032.57 | ns | 57350 |
| BM_dispenso/24/10/real_time | 14324.23 | ns | 48792 |
| BM_omp/64/10/real_time | 14919.26 | ns | 46589 |
| BM_dispenso/32/10/real_time | 18650.10 | ns | 35428 |
| BM_tbb/96/10/real_time | 18809.14 | ns | 37284 |
| BM_tbb/3/10/real_time | 19017.57 | ns | 37765 |
| BM_tbb/4/10/real_time | 19058.76 | ns | 36556 |
| BM_tbb/1/10/real_time | 19188.71 | ns | 36652 |
| BM_tbb/64/10/real_time | 19429.99 | ns | 35493 |
| BM_tbb/2/10/real_time | 19506.36 | ns | 36747 |
| BM_tbb/48/10/real_time | 20006.65 | ns | 34731 |
| BM_tbb/6/10/real_time | 20286.73 | ns | 35173 |
| BM_omp/1/10/real_time | 20393.95 | ns | 35299 |
| BM_omp/96/10/real_time | 20662.79 | ns | 33749 |
| BM_tbb/8/10/real_time | 20791.68 | ns | 33283 |
| BM_tbb/32/10/real_time | 20957.43 | ns | 33327 |
| BM_tbb/128/10/real_time | 20960.13 | ns | 33167 |
| BM_tbb/24/10/real_time | 21658.25 | ns | 31842 |
| BM_tbb/16/10/real_time | 21702.85 | ns | 31734 |
| BM_dispenso/48/10/real_time | 21774.12 | ns | 31082 |
| BM_tbb/12/10/real_time | 21936.70 | ns | 31931 |
| BM_dispenso/64/10/real_time | 22319.99 | ns | 31634 |
| BM_dispenso/96/10/real_time | 23406.57 | ns | 30318 |
| BM_dispenso/192/10/real_time | 23604.61 | ns | 29430 |
| BM_tbb/192/10/real_time | 24463.64 | ns | 33165 |
| BM_dispenso/128/10/real_time | 24901.25 | ns | 27826 |
| BM_omp/128/10/real_time | 26679.71 | ns | 26129 |
| BM_omp/192/10/real_time | 273944.86 | ns | 10000 |
| BM_omp/128/500/real_time | 312656.55 | ns | 2215 |
| BM_omp/64/500/real_time | 329473.09 | ns | 2006 |
| BM_omp/96/500/real_time | 401461.99 | ns | 1725 |
| BM_dispenso/128/500/real_time | 438805.14 | ns | 1227 |
| BM_dispenso/96/500/real_time | 441404.08 | ns | 1588 |
| BM_dispenso/64/500/real_time | 481504.03 | ns | 1070 |
| BM_omp/48/500/real_time | 494762.83 | ns | 1382 |
| BM_dispenso/48/500/real_time | 498748.72 | ns | 1167 |
| BM_dispenso/192/500/real_time | 518038.24 | ns | 1167 |
| BM_omp/32/500/real_time | 610597.69 | ns | 910 |
| BM_tbb/128/500/real_time | 640218.21 | ns | 981 |
| BM_tbb/96/500/real_time | 694327.70 | ns | 891 |
| BM_tbb/192/500/real_time | 714523.30 | ns | 894 |
| BM_omp/24/500/real_time | 723348.28 | ns | 978 |
| BM_dispenso/32/500/real_time | 830442.81 | ns | 648 |
| BM_tbb/64/500/real_time | 830623.84 | ns | 768 |
| BM_tbb/48/500/real_time | 950325.79 | ns | 632 |
| BM_dispenso/24/500/real_time | 987603.23 | ns | 599 |
| BM_omp/16/500/real_time | 1125305.58 | ns | 502 |
| BM_dispenso/16/500/real_time | 1142851.99 | ns | 507 |
| BM_tbb/32/500/real_time | 1274326.51 | ns | 503 |
| BM_dispenso/12/500/real_time | 1429623.97 | ns | 396 |
| BM_omp/12/500/real_time | 1461195.16 | ns | 387 |
| BM_tbb/24/500/real_time | 1604574.71 | ns | 391 |
| BM_dispenso/8/500/real_time | 2132988.67 | ns | 270 |
| BM_omp/8/500/real_time | 2241676.03 | ns | 268 |
| BM_tbb/16/500/real_time | 2410862.28 | ns | 263 |
| BM_dispenso/6/500/real_time | 2925520.60 | ns | 198 |
| BM_omp/6/500/real_time | 3009802.99 | ns | 194 |
| BM_tbb/12/500/real_time | 3094319.97 | ns | 214 |
| BM_dispenso/4/500/real_time | 4538486.45 | ns | 126 |
| BM_tbb/8/500/real_time | 4703940.40 | ns | 137 |
| BM_omp/4/500/real_time | 4790064.82 | ns | 119 |
| BM_omp/192/500/real_time | 5076339.11 | ns | 1480 |
| BM_tbb/6/500/real_time | 6423339.10 | ns | 99 |
| BM_dispenso/3/500/real_time | 6511763.34 | ns | 86 |
| BM_omp/3/500/real_time | 6763734.10 | ns | 83 |
| BM_tbb/4/500/real_time | 10046958.05 | ns | 64 |
| BM_dispenso/2/500/real_time | 11740972.65 | ns | 46 |
| BM_omp/2/500/real_time | 12219473.80 | ns | 46 |
| BM_tbb/3/500/real_time | 14323577.03 | ns | 44 |
| BM_tbb/2/500/real_time | 26588207.11 | ns | 21 |
| BM_tbb/192/3000/real_time | 28897564.29 | ns | 23 |
| BM_omp/128/3000/real_time | 33274534.62 | ns | 20 |
| BM_dispenso/128/3000/real_time | 34937168.23 | ns | 19 |
| BM_dispenso/192/3000/real_time | 36824789.98 | ns | 19 |
| BM_tbb/128/3000/real_time | 37577435.58 | ns | 18 |
| BM_omp/192/3000/real_time | 39880230.69 | ns | 17 |
| BM_omp/96/3000/real_time | 41533113.18 | ns | 17 |
| BM_dispenso/96/3000/real_time | 42337563.61 | ns | 13 |
| BM_serial<kMediumSize> | 44446194.08 | ns | 12 |
| BM_tbb/96/3000/real_time | 45465328.79 | ns | 15 |
| BM_dispenso/1/500/real_time | 46926374.80 | ns | 11 |
| BM_omp/1/500/real_time | 48987052.15 | ns | 11 |
| BM_omp/64/3000/real_time | 53292871.45 | ns | 12 |
| BM_dispenso/64/3000/real_time | 54803768.17 | ns | 12 |
| BM_tbb/64/3000/real_time | 60580727.56 | ns | 11 |
| BM_tbb/1/500/real_time | 66736426.20 | ns | 9 |
| BM_omp/48/3000/real_time | 67082724.72 | ns | 10 |
| BM_dispenso/48/3000/real_time | 68203550.66 | ns | 9 |
| BM_tbb/48/3000/real_time | 78102464.89 | ns | 9 |
| BM_omp/32/3000/real_time | 96375407.38 | ns | 7 |
| BM_dispenso/32/3000/real_time | 97165372.49 | ns | 7 |
| BM_tbb/32/3000/real_time | 109506090.17 | ns | 6 |
| BM_omp/24/3000/real_time | 128551444.41 | ns | 5 |
| BM_dispenso/24/3000/real_time | 130408811.20 | ns | 5 |
| BM_tbb/24/3000/real_time | 145830169.69 | ns | 5 |
| BM_omp/16/3000/real_time | 194345360.16 | ns | 4 |
| BM_dispenso/16/3000/real_time | 203846107.55 | ns | 3 |
| BM_tbb/16/3000/real_time | 220621350.53 | ns | 3 |
| BM_omp/12/3000/real_time | 259661081.56 | ns | 3 |
| BM_dispenso/12/3000/real_time | 267909605.06 | ns | 3 |
| BM_tbb/12/3000/real_time | 294873585.92 | ns | 2 |
| BM_dispenso/8/3000/real_time | 389330674.89 | ns | 2 |
| BM_omp/8/3000/real_time | 389388010.84 | ns | 2 |
| BM_tbb/8/3000/real_time | 440575836.23 | ns | 2 |
| BM_dispenso/6/3000/real_time | 517882110.55 | ns | 1 |
| BM_omp/6/3000/real_time | 519871765.75 | ns | 1 |
| BM_tbb/6/3000/real_time | 582667542.62 | ns | 1 |
| BM_dispenso/4/3000/real_time | 754384193.57 | ns | 1 |
| BM_omp/4/3000/real_time | 758184684.44 | ns | 1 |
| BM_tbb/4/3000/real_time | 850331416.35 | ns | 1 |
| BM_dispenso/3/3000/real_time | 985554648.56 | ns | 1 |
| BM_omp/3/3000/real_time | 990451890.98 | ns | 1 |
| BM_tbb/3/3000/real_time | 1120893433.69 | ns | 1 |
| BM_dispenso/2/3000/real_time | 1451981129.12 | ns | 1 |
| BM_omp/2/3000/real_time | 1476024180.65 | ns | 1 |
| BM_tbb/2/3000/real_time | 1654469324.28 | ns | 1 |
| BM_dispenso/1/3000/real_time | 2908631879.84 | ns | 1 |
| BM_serial<kLargeSize> | 2934453791.01 | ns | 1 |
| BM_omp/1/3000/real_time | 2975622540.34 | ns | 1 |
| BM_tbb/1/3000/real_time | 3314157780.26 | ns | 1 |


================================================
FILE: docs/benchmarks/nested_pool_details.md
================================================
# nested_pool - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_folly/128/1000000/real_time | 0.00 | us | 1 |
| BM_folly/12/1000000/real_time | 0.00 | us | 1 |
| BM_folly/8/1000000/real_time | 0.00 | us | 1 |
| BM_folly/24/1000000/real_time | 0.00 | us | 1 |
| BM_folly/32/1000000/real_time | 0.00 | us | 1 |
| BM_folly/48/1000000/real_time | 0.00 | us | 1 |
| BM_folly/64/1000000/real_time | 0.00 | us | 1 |
| BM_folly/96/1000000/real_time | 0.00 | us | 1 |
| BM_folly/6/1000000/real_time | 0.00 | us | 1 |
| BM_folly/192/1000000/real_time | 0.00 | us | 1 |
| BM_folly/16/1000000/real_time | 0.00 | us | 1 |
| BM_folly/1/1000000/real_time | 0.00 | us | 1 |
| BM_folly/3/1000000/real_time | 0.00 | us | 1 |
| BM_folly/4/1000000/real_time | 0.00 | us | 1 |
| BM_folly/2/1000000/real_time | 0.00 | us | 1 |
| BM_dispenso/6/1000/real_time | 325.99 | us | 1955 |
| BM_dispenso/1/1000/real_time | 343.19 | us | 1986 |
| BM_dispenso/4/1000/real_time | 349.33 | us | 1887 |
| BM_dispenso/2/1000/real_time | 425.45 | us | 1661 |
| BM_dispenso/3/1000/real_time | 450.04 | us | 1677 |
| BM_tbb/6/1000/real_time | 500.96 | us | 1192 |
| BM_tbb/12/1000/real_time | 510.32 | us | 1000 |
| BM_tbb/8/1000/real_time | 518.89 | us | 1434 |
| BM_tbb/16/1000/real_time | 528.18 | us | 1261 |
| BM_tbb/4/1000/real_time | 530.08 | us | 1286 |
| BM_dispenso/8/1000/real_time | 545.19 | us | 1196 |
| BM_tbb/24/1000/real_time | 581.87 | us | 1173 |
| BM_tbb/3/1000/real_time | 615.71 | us | 1165 |
| BM_tbb/32/1000/real_time | 618.90 | us | 1114 |
| BM_tbb/2/1000/real_time | 649.29 | us | 1036 |
| BM_tbb/1/1000/real_time | 815.86 | us | 828 |
| BM_tbb/48/1000/real_time | 932.62 | us | 685 |
| BM_dispenso/12/1000/real_time | 1217.06 | us | 576 |
| BM_tbb/64/1000/real_time | 1298.58 | us | 532 |
| BM_dispenso/16/1000/real_time | 1367.10 | us | 458 |
| BM_tbb/96/1000/real_time | 1899.00 | us | 369 |
| BM_tbb/128/1000/real_time | 2205.34 | us | 317 |
| BM_tbb/192/1000/real_time | 2342.31 | us | 295 |
| BM_dispenso/24/1000/real_time | 2673.69 | us | 247 |
| BM_dispenso/6/10000/real_time | 3673.71 | us | 205 |
| BM_dispenso/32/1000/real_time | 3797.78 | us | 342 |
| BM_folly/128/1000/real_time | 4447.42 | us | 131 |
| BM_folly/192/1000/real_time | 4498.94 | us | 125 |
| BM_folly/96/1000/real_time | 4502.18 | us | 135 |
| BM_dispenso/4/10000/real_time | 4587.68 | us | 132 |
| BM_folly/64/1000/real_time | 4879.30 | us | 136 |
| BM_dispenso/48/1000/real_time | 5085.74 | us | 100 |
| BM_dispenso/8/10000/real_time | 5252.31 | us | 100 |
| BM_folly/48/1000/real_time | 5343.84 | us | 122 |
| BM_dispenso/64/1000/real_time | 5776.43 | us | 111 |
| BM_dispenso/3/10000/real_time | 6146.50 | us | 115 |
| BM_folly/32/1000/real_time | 6218.64 | us | 107 |
| BM_dispenso/128/1000/real_time | 6266.09 | us | 112 |
| BM_dispenso/192/1000/real_time | 6391.82 | us | 111 |
| BM_dispenso/96/1000/real_time | 7058.88 | us | 95 |
| BM_folly/24/1000/real_time | 7076.47 | us | 92 |
| BM_folly/6/1000/real_time | 8715.42 | us | 92 |
| BM_folly/16/1000/real_time | 8834.35 | us | 76 |
| BM_dispenso/12/10000/real_time | 8948.02 | us | 72 |
| BM_tbb/96/10000/real_time | 9193.33 | us | 74 |
| BM_folly/4/1000/real_time | 9512.76 | us | 72 |
| BM_folly/12/1000/real_time | 9973.22 | us | 64 |
| BM_folly/8/1000/real_time | 10355.91 | us | 51 |
| BM_dispenso/2/10000/real_time | 10521.48 | us | 64 |
| BM_dispenso/1/10000/real_time | 10539.84 | us | 64 |
| BM_tbb/64/10000/real_time | 11189.42 | us | 63 |
| BM_folly/1/1000/real_time | 12064.76 | us | 49 |
| BM_folly/3/1000/real_time | 12517.39 | us | 56 |
| BM_tbb/48/10000/real_time | 13262.26 | us | 53 |
| BM_dispenso/24/10000/real_time | 13670.78 | us | 38 |
| BM_dispenso/16/10000/real_time | 13690.36 | us | 56 |
| BM_folly/2/1000/real_time | 14950.34 | us | 48 |
| BM_tbb/128/10000/real_time | 15952.46 | us | 42 |
| BM_dispenso/32/10000/real_time | 16237.09 | us | 41 |
| BM_tbb/32/10000/real_time | 17077.00 | us | 41 |
| BM_tbb/24/10000/real_time | 20117.07 | us | 35 |
| BM_tbb/8/10000/real_time | 24390.80 | us | 28 |
| BM_tbb/16/10000/real_time | 24553.90 | us | 29 |
| BM_tbb/192/10000/real_time | 24581.74 | us | 29 |
| BM_tbb/3/10000/real_time | 24656.19 | us | 28 |
| BM_tbb/6/10000/real_time | 24867.49 | us | 29 |
| BM_tbb/4/10000/real_time | 24964.61 | us | 28 |
| BM_tbb/2/10000/real_time | 25459.13 | us | 27 |
| BM_tbb/1/10000/real_time | 25880.37 | us | 28 |
| BM_tbb/12/10000/real_time | 26109.60 | us | 26 |
| BM_dispenso/48/10000/real_time | 47591.21 | us | 14 |
| BM_dispenso/64/10000/real_time | 58463.12 | us | 12 |
| BM_dispenso/96/10000/real_time | 64178.89 | us | 10 |
| BM_dispenso/128/10000/real_time | 72989.45 | us | 10 |
| BM_dispenso/192/10000/real_time | 79560.49 | us | 8 |
| BM_folly/192/10000/real_time | 106249.63 | us | 6 |
| BM_folly/128/10000/real_time | 112009.02 | us | 6 |
| BM_folly/96/10000/real_time | 123839.03 | us | 6 |
| BM_folly/64/10000/real_time | 128345.12 | us | 5 |
| BM_folly/48/10000/real_time | 141985.48 | us | 5 |
| BM_folly/32/10000/real_time | 166693.36 | us | 4 |
| BM_folly/24/10000/real_time | 191060.68 | us | 4 |
| BM_folly/16/10000/real_time | 226489.17 | us | 3 |
| BM_folly/12/10000/real_time | 254656.73 | us | 2 |
| BM_folly/8/10000/real_time | 283280.30 | us | 2 |
| BM_folly/6/10000/real_time | 295460.53 | us | 3 |
| BM_folly/4/10000/real_time | 306877.08 | us | 2 |
| BM_folly/3/10000/real_time | 364927.17 | us | 2 |
| BM_folly/2/10000/real_time | 664485.05 | us | 1 |
| BM_dispenso/64/1000000/real_time | 778425.68 | us | 1 |
| BM_dispenso/48/1000000/real_time | 847233.67 | us | 1 |
| BM_dispenso/32/1000000/real_time | 947500.57 | us | 1 |
| BM_dispenso/24/1000000/real_time | 983596.96 | us | 1 |
| BM_folly/1/10000/real_time | 983616.49 | us | 1 |
| BM_tbb/192/1000000/real_time | 1358241.92 | us | 1 |
| BM_tbb/128/1000000/real_time | 1444985.21 | us | 1 |
| BM_tbb/96/1000000/real_time | 1597546.47 | us | 1 |
| BM_tbb/64/1000000/real_time | 1993163.84 | us | 1 |
| BM_dispenso/16/1000000/real_time | 2024024.62 | us | 1 |
| BM_dispenso/8/1000000/real_time | 2106747.59 | us | 1 |
| BM_dispenso/12/1000000/real_time | 2119761.85 | us | 1 |
| BM_dispenso/6/1000000/real_time | 2285997.51 | us | 1 |
| BM_tbb/48/1000000/real_time | 2449716.77 | us | 1 |
| BM_dispenso/4/1000000/real_time | 3027219.49 | us | 1 |
| BM_dispenso/2/1000000/real_time | 3159682.06 | us | 1 |
| BM_dispenso/3/1000000/real_time | 3348506.64 | us | 1 |
| BM_tbb/32/1000000/real_time | 3429660.21 | us | 1 |
| BM_tbb/24/1000000/real_time | 4332955.20 | us | 1 |
| BM_dispenso/96/1000000/real_time | 4406276.82 | us | 1 |
| BM_dispenso/128/1000000/real_time | 5219742.50 | us | 1 |
| BM_tbb/16/1000000/real_time | 6071265.79 | us | 1 |
| BM_tbb/12/1000000/real_time | 7583535.18 | us | 1 |
| BM_dispenso/192/1000000/real_time | 8143889.19 | us | 1 |
| BM_tbb/8/1000000/real_time | 10309799.41 | us | 1 |
| BM_dispenso/1/1000000/real_time | 10333296.42 | us | 1 |
| BM_tbb/6/1000000/real_time | 12545453.25 | us | 1 |
| BM_tbb/4/1000000/real_time | 15950276.53 | us | 1 |
| BM_tbb/3/1000000/real_time | 18490876.57 | us | 1 |
| BM_tbb/2/1000000/real_time | 21539772.28 | us | 1 |
| BM_tbb/1/1000000/real_time | 25233722.73 | us | 1 |


================================================
FILE: docs/benchmarks/once_function_details.md
================================================
# once_function - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_move_once_function<kLargeSize> | 13.29 | ns | 54440054 |
| BM_move_once_function<kSmallSize> | 17.13 | ns | 40441071 |
| BM_move_once_function<kMediumSize> | 18.02 | ns | 38857001 |
| BM_move_once_function<kExtraLargeSize> | 33.54 | ns | 21383979 |
| BM_move_std_function<kMediumSize> | 90.97 | ns | 7756973 |
| BM_move_std_function<kSmallSize> | 91.09 | ns | 7710676 |
| BM_move_std_function<kLargeSize> | 94.42 | ns | 7299053 |
| BM_move_std_function<kExtraLargeSize> | 105.35 | ns | 6775091 |
| BM_queue_inline_function<kSmallSize> | 1041.31 | ns | 707357 |
| BM_queue_inline_function<kMediumSize> | 1862.85 | ns | 387990 |
| BM_queue_once_function<kSmallSize> | 2739.51 | ns | 260781 |
| BM_queue_once_function<kMediumSize> | 3002.41 | ns | 234757 |
| BM_queue_inline_function<kLargeSize> | 3094.38 | ns | 220748 |
| BM_queue_std_function<kSmallSize> | 3411.29 | ns | 210644 |
| BM_queue_once_function<kLargeSize> | 3546.94 | ns | 201507 |
| BM_queue_std_function<kMediumSize> | 3713.12 | ns | 200697 |
| BM_queue_std_function<kLargeSize> | 4922.03 | ns | 147424 |
| BM_queue_std_function<kExtraLargeSize> | 6862.80 | ns | 105556 |
| BM_queue_inline_function<kExtraLargeSize> | 9352.52 | ns | 72910 |
| BM_queue_once_function<kExtraLargeSize> | 58445.14 | ns | 12402 |


================================================
FILE: docs/benchmarks/pipeline_details.md
================================================
# pipeline - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_dispenso_par/real_time | 4155179.07 | ns | 170 |
| BM_tbb_par/real_time | 4367384.58 | ns | 162 |
| BM_taskflow_par/real_time | 69190848.06 | ns | 10 |
| BM_taskflow/real_time | 117103163.15 | ns | 6 |
| BM_tbb/real_time | 148514535.65 | ns | 5 |
| BM_dispenso/real_time | 156323259.70 | ns | 5 |
| BM_serial/real_time | 196434217.19 | ns | 3 |


================================================
FILE: docs/benchmarks/pool_allocator_details.md
================================================
# pool_allocator - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_nl_pool_allocator_arena<kLargeSize>/8192 | 17054.11 | ns | 40406 |
| BM_nl_pool_allocator_arena<kSmallSize>/8192 | 17333.79 | ns | 40170 |
| BM_nl_pool_allocator_arena<kMediumSize>/8192 | 17700.59 | ns | 40797 |
| BM_pool_allocator_arena<kSmallSize>/8192 | 21821.18 | ns | 32799 |
| BM_pool_allocator_arena<kLargeSize>/8192 | 22647.31 | ns | 30430 |
| BM_pool_allocator_arena<kMediumSize>/8192 | 22684.09 | ns | 30118 |
| BM_nl_pool_allocator<kLargeSize>/8192 | 29741.05 | ns | 23588 |
| BM_nl_pool_allocator<kMediumSize>/8192 | 31655.55 | ns | 22052 |
| BM_nl_pool_allocator<kSmallSize>/8192 | 34342.47 | ns | 21522 |
| BM_pool_allocator<kLargeSize>/8192 | 36484.57 | ns | 19186 |
| BM_pool_allocator<kMediumSize>/8192 | 37425.53 | ns | 17877 |
| BM_pool_allocator<kSmallSize>/8192 | 37987.55 | ns | 17682 |
| BM_nl_pool_allocator_arena<kLargeSize>/32768 | 69236.58 | ns | 9925 |
| BM_nl_pool_allocator_arena<kSmallSize>/32768 | 69912.30 | ns | 9944 |
| BM_nl_pool_allocator_arena<kMediumSize>/32768 | 70310.29 | ns | 9919 |
| BM_pool_allocator_arena<kLargeSize>/32768 | 89909.23 | ns | 7441 |
| BM_pool_allocator_arena<kMediumSize>/32768 | 90412.18 | ns | 7427 |
| BM_pool_allocator_arena<kSmallSize>/32768 | 90838.42 | ns | 7757 |
| BM_nl_pool_allocator<kLargeSize>/32768 | 120847.58 | ns | 5878 |
| BM_pool_allocator_threaded<kLargeSize,2>/8192 | 121028.54 | ns | 5910 |
| BM_pool_allocator_threaded<kSmallSize,2>/8192 | 121729.99 | ns | 5509 |
| BM_nl_pool_allocator<kMediumSize>/32768 | 127654.59 | ns | 5519 |
| BM_pool_allocator_threaded<kMediumSize,2>/8192 | 129158.18 | ns | 5764 |
| BM_nl_pool_allocator<kSmallSize>/32768 | 137988.98 | ns | 5153 |
| BM_pool_allocator<kLargeSize>/32768 | 144693.54 | ns | 4707 |
| BM_pool_allocator<kSmallSize>/32768 | 152049.96 | ns | 4655 |
| BM_pool_allocator<kMediumSize>/32768 | 154021.98 | ns | 4401 |
| BM_pool_allocator_threaded<kMediumSize,2>/32768 | 503743.12 | ns | 1228 |
| BM_pool_allocator_threaded<kSmallSize,2>/32768 | 506220.02 | ns | 1000 |
| BM_pool_allocator_threaded<kLargeSize,2>/32768 | 531615.33 | ns | 1363 |
| BM_pool_allocator_threaded<kLargeSize,8>/8192 | 1621277.61 | ns | 345 |
| BM_pool_allocator_threaded<kSmallSize,8>/8192 | 1680099.48 | ns | 402 |
| BM_pool_allocator_threaded<kMediumSize,8>/8192 | 1788910.60 | ns | 544 |
| BM_mallocfree_threaded<kSmallSize,2>/8192 | 3233608.82 | ns | 216 |
| BM_mallocfree<kSmallSize>/8192 | 3248480.53 | ns | 242 |
| BM_mallocfree_threaded<kSmallSize,16>/8192 | 4259915.39 | ns | 164 |
| BM_mallocfree_threaded<kSmallSize,8>/8192 | 4446897.58 | ns | 139 |
| BM_pool_allocator_threaded<kMediumSize,8>/32768 | 7479885.65 | ns | 71 |
| BM_pool_allocator_threaded<kLargeSize,8>/32768 | 8125131.46 | ns | 88 |
| BM_pool_allocator_threaded<kSmallSize,8>/32768 | 8341532.25 | ns | 81 |
| BM_mallocfree<kMediumSize>/8192 | 13392285.97 | ns | 55 |
| BM_mallocfree<kSmallSize>/32768 | 13861253.50 | ns | 52 |
| BM_mallocfree_threaded<kMediumSize,2>/8192 | 14138992.68 | ns | 48 |
| BM_mallocfree_threaded<kMediumSize,8>/8192 | 14603246.74 | ns | 48 |
| BM_mallocfree_threaded<kSmallSize,2>/32768 | 15468932.77 | ns | 44 |
| BM_mallocfree_threaded<kMediumSize,16>/8192 | 15805052.22 | ns | 45 |
| BM_mallocfree<kLargeSize>/8192 | 17367546.67 | ns | 42 |
| BM_mallocfree_threaded<kSmallSize,8>/32768 | 17502973.00 | ns | 38 |
| BM_mallocfree_threaded<kSmallSize,16>/32768 | 17979591.92 | ns | 34 |
| BM_mallocfree_threaded<kLargeSize,2>/8192 | 18603123.63 | ns | 40 |
| BM_pool_allocator_threaded<kLargeSize,16>/8192 | 22322069.69 | ns | 34 |
| BM_pool_allocator_threaded<kMediumSize,16>/8192 | 24341023.80 | ns | 34 |
| BM_pool_allocator_threaded<kSmallSize,16>/8192 | 27201183.49 | ns | 43 |
| BM_mallocfree_threaded<kLargeSize,8>/8192 | 37882218.23 | ns | 21 |
| BM_mallocfree_threaded<kMediumSize,8>/32768 | 52349818.25 | ns | 12 |
| BM_mallocfree_threaded<kMediumSize,2>/32768 | 56055353.80 | ns | 12 |
| BM_mallocfree<kMediumSize>/32768 | 57522366.16 | ns | 11 |
| BM_mallocfree_threaded<kMediumSize,16>/32768 | 60005804.71 | ns | 10 |
| BM_mallocfree<kLargeSize>/32768 | 73372758.92 | ns | 10 |
| BM_mallocfree_threaded<kLargeSize,2>/32768 | 101322307.38 | ns | 7 |
| BM_mallocfree_threaded<kLargeSize,16>/8192 | 102502740.30 | ns | 6 |
| BM_pool_allocator_threaded<kSmallSize,16>/32768 | 135514692.96 | ns | 5 |
| BM_pool_allocator_threaded<kMediumSize,16>/32768 | 157790044.79 | ns | 4 |
| BM_pool_allocator_threaded<kLargeSize,16>/32768 | 196176297.03 | ns | 4 |
| BM_mallocfree_threaded<kLargeSize,8>/32768 | 343516790.00 | ns | 3 |
| BM_mallocfree_threaded<kLargeSize,16>/32768 | 937468832.36 | ns | 1 |


================================================
FILE: docs/benchmarks/rw_lock_details.md
================================================
# rw_lock - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_serial<NopMutex>/512/real_time | 234541.82 | ns | 3096 |
| BM_serial<NopMutex>/2/real_time | 297997.41 | ns | 2336 |
| BM_serial<NopMutex>/8/real_time | 305736.59 | ns | 2221 |
| BM_serial<NopMutex>/128/real_time | 308857.05 | ns | 2328 |
| BM_serial<NopMutex>/32/real_time | 346415.75 | ns | 2367 |
| BM_serial<dispenso::RWLock>/512/real_time | 3401847.25 | ns | 203 |
| BM_serial<dispenso::RWLock>/32/real_time | 3413924.16 | ns | 199 |
| BM_serial<dispenso::RWLock>/128/real_time | 3415086.27 | ns | 202 |
| BM_serial<dispenso::RWLock>/8/real_time | 3443533.27 | ns | 202 |
| BM_serial<dispenso::RWLock>/2/real_time | 3493863.55 | ns | 199 |
| BM_parallel<dispenso::RWLock>/1/8/real_time | 3593337.65 | ns | 188 |
| BM_parallel<dispenso::RWLock>/1/512/real_time | 3641967.67 | ns | 196 |
| BM_parallel<dispenso::RWLock>/1/2/real_time | 3675817.93 | ns | 189 |
| BM_parallel<dispenso::RWLock>/1/32/real_time | 3693638.57 | ns | 190 |
| BM_parallel<dispenso::RWLock>/1/128/real_time | 3696675.29 | ns | 194 |
| BM_serial<std::shared_mutex>/512/real_time | 4977050.46 | ns | 141 |
| BM_parallel<std::shared_mutex>/1/128/real_time | 5030428.58 | ns | 137 |
| BM_parallel<std::shared_mutex>/1/512/real_time | 5100608.31 | ns | 135 |
| BM_serial<std::shared_mutex>/128/real_time | 5174942.85 | ns | 128 |
| BM_serial<std::shared_mutex>/32/real_time | 5192198.83 | ns | 133 |
| BM_parallel<std::shared_mutex>/1/32/real_time | 5305408.82 | ns | 134 |
| BM_serial<std::shared_mutex>/8/real_time | 5455984.98 | ns | 128 |
| BM_parallel<std::shared_mutex>/1/8/real_time | 5694363.33 | ns | 122 |
| BM_serial<std::shared_mutex>/2/real_time | 6761902.37 | ns | 104 |
| BM_parallel<std::shared_mutex>/1/2/real_time | 7094738.37 | ns | 97 |
| BM_parallel<dispenso::RWLock>/2/512/real_time | 15736565.84 | ns | 46 |
| BM_parallel<dispenso::RWLock>/2/128/real_time | 16582068.77 | ns | 42 |
| BM_parallel<dispenso::RWLock>/2/32/real_time | 19130194.77 | ns | 35 |
| BM_parallel<dispenso::RWLock>/2/8/real_time | 27984743.47 | ns | 24 |
| BM_parallel<dispenso::RWLock>/4/512/real_time | 35404025.95 | ns | 20 |
| BM_parallel<dispenso::RWLock>/4/128/real_time | 39269209.43 | ns | 18 |
| BM_parallel<dispenso::RWLock>/2/2/real_time | 44925847.21 | ns | 16 |
| BM_parallel<std::shared_mutex>/2/512/real_time | 48039350.16 | ns | 15 |
| BM_parallel<dispenso::RWLock>/4/32/real_time | 57459885.83 | ns | 13 |
| BM_parallel<std::shared_mutex>/2/128/real_time | 68098193.71 | ns | 11 |
| BM_parallel<std::shared_mutex>/2/2/real_time | 75076002.26 | ns | 10 |
| BM_parallel<std::shared_mutex>/4/512/real_time | 78770878.83 | ns | 9 |
| BM_parallel<dispenso::RWLock>/8/512/real_time | 80790802.42 | ns | 9 |
| BM_parallel<std::shared_mutex>/2/32/real_time | 82806976.93 | ns | 9 |
| BM_parallel<std::shared_mutex>/2/8/real_time | 95574075.82 | ns | 8 |
| BM_parallel<dispenso::RWLock>/8/128/real_time | 99372046.87 | ns | 7 |
| BM_parallel<dispenso::RWLock>/4/8/real_time | 109256845.90 | ns | 6 |
| BM_parallel<dispenso::RWLock>/4/2/real_time | 146843415.13 | ns | 5 |
| BM_parallel<std::shared_mutex>/4/2/real_time | 171788787.93 | ns | 4 |
| BM_parallel<dispenso::RWLock>/8/32/real_time | 175050012.30 | ns | 4 |
| BM_parallel<std::shared_mutex>/4/128/real_time | 202781901.23 | ns | 4 |
| BM_parallel<dispenso::RWLock>/16/512/real_time | 227628373.23 | ns | 3 |
| BM_parallel<std::shared_mutex>/8/512/real_time | 244290089.23 | ns | 3 |
| BM_parallel<std::shared_mutex>/4/8/real_time | 293520961.89 | ns | 2 |
| BM_parallel<dispenso::RWLock>/16/128/real_time | 344752916.13 | ns | 2 |
| BM_parallel<std::shared_mutex>/8/2/real_time | 359693103.47 | ns | 2 |
| BM_parallel<dispenso::RWLock>/8/8/real_time | 363515906.04 | ns | 2 |
| BM_parallel<std::shared_mutex>/4/32/real_time | 366221060.98 | ns | 2 |
| BM_parallel<std::shared_mutex>/8/128/real_time | 421815840.53 | ns | 2 |
| BM_parallel<dispenso::RWLock>/32/512/real_time | 497091038.15 | ns | 2 |
| BM_parallel<std::shared_mutex>/16/512/real_time | 506448486.82 | ns | 1 |
| BM_parallel<dispenso::RWLock>/8/2/real_time | 512869147.58 | ns | 2 |
| BM_parallel<std::shared_mutex>/8/8/real_time | 623631633.82 | ns | 1 |
| BM_parallel<std::shared_mutex>/16/2/real_time | 697507264.09 | ns | 1 |
| BM_parallel<dispenso::RWLock>/16/32/real_time | 708006797.36 | ns | 1 |
| BM_parallel<dispenso::RWLock>/32/128/real_time | 889477882.53 | ns | 1 |
| BM_parallel<std::shared_mutex>/8/32/real_time | 897052172.57 | ns | 1 |
| BM_parallel<std::shared_mutex>/32/512/real_time | 976485032.59 | ns | 1 |
| BM_parallel<std::shared_mutex>/16/128/real_time | 1047838592.90 | ns | 1 |
| BM_parallel<std::shared_mutex>/16/8/real_time | 1180674405.77 | ns | 1 |
| BM_parallel<std::shared_mutex>/32/2/real_time | 1345944730.56 | ns | 1 |
| BM_parallel<dispenso::RWLock>/16/8/real_time | 1736350167.54 | ns | 1 |
| BM_parallel<dispenso::RWLock>/32/32/real_time | 2051244040.95 | ns | 1 |
| BM_parallel<std::shared_mutex>/16/32/real_time | 2053894903.51 | ns | 1 |
| BM_parallel<std::shared_mutex>/32/128/real_time | 2159483877.94 | ns | 1 |
| BM_parallel<std::shared_mutex>/32/8/real_time | 2524126691.74 | ns | 1 |
| BM_parallel<dispenso::RWLock>/16/2/real_time | 2890445239.84 | ns | 1 |
| BM_parallel<dispenso::RWLock>/32/8/real_time | 3840729236.60 | ns | 1 |
| BM_parallel<std::shared_mutex>/32/32/real_time | 4473716894.16 | ns | 1 |
| BM_parallel<dispenso::RWLock>/32/2/real_time | 10992805242.54 | ns | 1 |


================================================
FILE: docs/benchmarks/simple_for_details.md
================================================
# simple_for - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_serial<kSmallSize> | 137.00 | ns | 5252614 |
| BM_dispenso_static_chunk/1/1000/real_time | 141.49 | ns | 4960331 |
| BM_dispenso_auto_chunk/12/1000/real_time | 141.53 | ns | 4813582 |
| BM_dispenso_static_chunk/8/1000/real_time | 142.09 | ns | 4652275 |
| BM_dispenso_static_chunk/6/1000/real_time | 142.32 | ns | 4775396 |
| BM_dispenso_auto_chunk/2/1000/real_time | 142.80 | ns | 4655604 |
| BM_dispenso_static_chunk/4/1000/real_time | 143.00 | ns | 4820496 |
| BM_dispenso_static_chunk/48/1000/real_time | 143.60 | ns | 4853788 |
| BM_dispenso_static_chunk/3/1000/real_time | 143.72 | ns | 4827490 |
| BM_dispenso_auto_chunk/16/1000/real_time | 143.88 | ns | 4638784 |
| BM_dispenso_auto_chunk/24/1000/real_time | 143.88 | ns | 4645745 |
| BM_dispenso/1/1000/real_time | 143.89 | ns | 4872629 |
| BM_dispenso_auto_chunk/6/1000/real_time | 144.36 | ns | 4829474 |
| BM_dispenso/2/1000/real_time | 144.50 | ns | 4899809 |
| BM_dispenso_static_chunk/64/1000/real_time | 144.60 | ns | 4643943 |
| BM_dispenso/8/1000/real_time | 144.95 | ns | 4617960 |
| BM_dispenso_static_chunk/2/1000/real_time | 145.21 | ns | 4933091 |
| BM_dispenso_auto_chunk/8/1000/real_time | 145.29 | ns | 4623094 |
| BM_dispenso_static_chunk/24/1000/real_time | 145.34 | ns | 4687311 |
| BM_dispenso_static_chunk/16/1000/real_time | 145.60 | ns | 4824891 |
| BM_dispenso/6/1000/real_time | 145.71 | ns | 4548093 |
| BM_dispenso/4/1000/real_time | 145.86 | ns | 4323846 |
| BM_dispenso/24/1000/real_time | 146.00 | ns | 4705833 |
| BM_dispenso_auto_chunk/1/1000/real_time | 146.07 | ns | 4848534 |
| BM_dispenso/64/1000/real_time | 146.19 | ns | 4829106 |
| BM_dispenso_auto_chunk/64/1000/real_time | 146.46 | ns | 4727871 |
| BM_dispenso_static_chunk/96/1000/real_time | 146.70 | ns | 4626550 |
| BM_dispenso_static_chunk/128/1000/real_time | 146.75 | ns | 4657266 |
| BM_dispenso_auto_chunk/32/1000/real_time | 146.77 | ns | 4575927 |
| BM_dispenso_auto_chunk/4/1000/real_time | 147.13 | ns | 4916734 |
| BM_dispenso_auto_chunk/48/1000/real_time | 147.15 | ns | 4514336 |
| BM_dispenso_auto_chunk/3/1000/real_time | 147.25 | ns | 4639890 |
| BM_dispenso/16/1000/real_time | 147.55 | ns | 4708715 |
| BM_dispenso_static_chunk/12/1000/real_time | 147.71 | ns | 4851106 |
| BM_dispenso/3/1000/real_time | 148.10 | ns | 4837754 |
| BM_dispenso_static_chunk/32/1000/real_time | 148.20 | ns | 4503125 |
| BM_dispenso/12/1000/real_time | 148.41 | ns | 4801536 |
| BM_dispenso/32/1000/real_time | 148.81 | ns | 4802635 |
| BM_dispenso_auto_chunk/192/1000/real_time | 149.35 | ns | 4594436 |
| BM_dispenso/48/1000/real_time | 150.04 | ns | 4543323 |
| BM_dispenso/96/1000/real_time | 150.30 | ns | 4516922 |
| BM_dispenso_static_chunk/192/1000/real_time | 150.42 | ns | 4619043 |
| BM_dispenso/192/1000/real_time | 151.30 | ns | 4645230 |
| BM_dispenso_auto_chunk/128/1000/real_time | 152.30 | ns | 4570080 |
| BM_dispenso/128/1000/real_time | 152.83 | ns | 4656918 |
| BM_dispenso_auto_chunk/96/1000/real_time | 153.25 | ns | 4631554 |
| BM_omp/1/1000/real_time | 824.39 | ns | 876416 |
| BM_omp/2/1000/real_time | 1485.19 | ns | 453712 |
| BM_omp/3/1000/real_time | 1492.21 | ns | 464186 |
| BM_omp/6/1000/real_time | 1579.97 | ns | 467615 |
| BM_omp/4/1000/real_time | 1624.42 | ns | 473894 |
| BM_omp/8/1000/real_time | 2135.85 | ns | 420350 |
| BM_omp/12/1000/real_time | 3125.43 | ns | 229073 |
| BM_omp/16/1000/real_time | 3535.99 | ns | 197394 |
| BM_omp/24/1000/real_time | 5344.37 | ns | 126386 |
| BM_omp/32/1000/real_time | 6769.37 | ns | 106128 |
| BM_omp/48/1000/real_time | 9171.90 | ns | 75701 |
| BM_omp/64/1000/real_time | 11898.31 | ns | 58889 |
| BM_omp/32/1000000/real_time | 12512.92 | ns | 54617 |
| BM_omp/24/1000000/real_time | 12954.34 | ns | 53572 |
| BM_omp/48/1000000/real_time | 13276.90 | ns | 51749 |
| BM_omp/64/1000000/real_time | 15448.39 | ns | 44999 |
| BM_omp/16/1000000/real_time | 15669.81 | ns | 43324 |
| BM_tbb/192/1000/real_time | 16496.57 | ns | 44009 |
| BM_omp/96/1000/real_time | 16814.97 | ns | 41254 |
| BM_tbb/96/1000/real_time | 17728.43 | ns | 39737 |
| BM_omp/12/1000000/real_time | 17871.82 | ns | 37721 |
| BM_tbb/32/1000/real_time | 17941.87 | ns | 38430 |
| BM_tbb/128/1000/real_time | 19067.40 | ns | 36288 |
| BM_tbb/64/1000/real_time | 19077.73 | ns | 35584 |
| BM_tbb/24/1000/real_time | 19234.59 | ns | 36211 |
| BM_tbb/48/1000/real_time | 19847.06 | ns | 35060 |
| BM_tbb/3/1000/real_time | 20079.14 | ns | 35469 |
| BM_tbb/16/1000/real_time | 20757.42 | ns | 33853 |
| BM_omp/96/1000000/real_time | 20873.84 | ns | 33191 |
| BM_tbb/8/1000/real_time | 21176.99 | ns | 32118 |
| BM_tbb/12/1000/real_time | 21426.87 | ns | 32203 |
| BM_tbb/6/1000/real_time | 21552.50 | ns | 33389 |
| BM_tbb/4/1000/real_time | 21585.68 | ns | 36187 |
| BM_tbb/2/1000/real_time | 22231.27 | ns | 30352 |
| BM_omp/128/1000/real_time | 23381.80 | ns | 29497 |
| BM_tbb/1/1000/real_time | 24774.84 | ns | 28307 |
| BM_omp/8/1000000/real_time | 25386.69 | ns | 29310 |
| BM_omp/128/1000000/real_time | 25990.22 | ns | 26547 |
| BM_omp/6/1000000/real_time | 29647.25 | ns | 23418 |
| BM_omp/4/1000000/real_time | 41452.53 | ns | 17089 |
| BM_dispenso_static_chunk/64/1000000/real_time | 42483.29 | ns | 16006 |
| BM_dispenso_static_chunk/48/1000000/real_time | 43534.16 | ns | 15999 |
| BM_dispenso/64/1000000/real_time | 44074.44 | ns | 15816 |
| BM_dispenso_auto_chunk/48/1000000/real_time | 44168.52 | ns | 15715 |
| BM_dispenso_auto_chunk/64/1000000/real_time | 44650.27 | ns | 16033 |
| BM_dispenso/48/1000000/real_time | 45129.76 | ns | 16331 |
| BM_dispenso_static_chunk/24/1000000/real_time | 45622.64 | ns | 15256 |
| BM_dispenso_auto_chunk/24/1000000/real_time | 46468.22 | ns | 15339 |
| BM_dispenso_static_chunk/32/1000000/real_time | 46784.87 | ns | 14605 |
| BM_dispenso/32/1000000/real_time | 46798.52 | ns | 15356 |
| BM_dispenso_static_chunk/12/1000000/real_time | 47198.49 | ns | 13471 |
| BM_dispenso_auto_chunk/32/1000000/real_time | 47425.30 | ns | 14760 |
| BM_dispenso/24/1000000/real_time | 47515.48 | ns | 15223 |
| BM_dispenso/16/1000000/real_time | 47543.43 | ns | 14098 |
| BM_dispenso_auto_chunk/12/1000000/real_time | 47636.66 | ns | 15261 |
| BM_dispenso_auto_chunk/16/1000000/real_time | 48238.79 | ns | 13659 |
| BM_dispenso_static_chunk/16/1000000/real_time | 48902.05 | ns | 14448 |
| BM_dispenso/12/1000000/real_time | 49467.11 | ns | 14992 |
| BM_tbb/128/1000000/real_time | 50951.47 | ns | 12742 |
| BM_dispenso/8/1000000/real_time | 52919.55 | ns | 12929 |
| BM_dispenso_static_chunk/8/1000000/real_time | 52957.56 | ns | 13328 |
| BM_dispenso_auto_chunk/8/1000000/real_time | 52986.65 | ns | 12834 |
| BM_tbb/64/1000000/real_time | 53666.34 | ns | 12759 |
| BM_tbb/96/1000000/real_time | 53669.19 | ns | 13123 |
| BM_omp/3/1000000/real_time | 53903.25 | ns | 13113 |
| BM_tbb/48/1000000/real_time | 55952.78 | ns | 12355 |
| BM_dispenso_static_chunk/96/1000000/real_time | 56094.43 | ns | 11148 |
| BM_dispenso_auto_chunk/96/1000000/real_time | 58046.06 | ns | 12962 |
| BM_dispenso/96/1000000/real_time | 58499.99 | ns | 11822 |
| BM_dispenso/6/1000000/real_time | 58817.88 | ns | 11329 |
| BM_dispenso/128/1000000/real_time | 59352.12 | ns | 11991 |
| BM_dispenso_auto_chunk/128/1000000/real_time | 60581.67 | ns | 11036 |
| BM_dispenso_static_chunk/6/1000000/real_time | 61099.99 | ns | 11308 |
| BM_dispenso_auto_chunk/6/1000000/real_time | 61320.21 | ns | 11474 |
| BM_dispenso_static_chunk/128/1000000/real_time | 61801.82 | ns | 10994 |
| BM_dispenso_auto_chunk/192/1000000/real_time | 62319.35 | ns | 11167 |
| BM_dispenso/192/1000000/real_time | 63878.29 | ns | 11055 |
| BM_dispenso_static_chunk/192/1000000/real_time | 63937.02 | ns | 10783 |
| BM_tbb/32/1000000/real_time | 64336.80 | ns | 10749 |
| BM_tbb/192/1000000/real_time | 68518.47 | ns | 9474 |
| BM_tbb/24/1000000/real_time | 69249.19 | ns | 10049 |
| BM_dispenso/4/1000000/real_time | 76785.94 | ns | 8983 |
| BM_tbb/16/1000000/real_time | 76896.40 | ns | 8848 |
| BM_omp/2/1000000/real_time | 78187.48 | ns | 9074 |
| BM_dispenso_static_chunk/4/1000000/real_time | 78766.94 | ns | 8778 |
| BM_dispenso_auto_chunk/4/1000000/real_time | 80370.59 | ns | 8725 |
| BM_tbb/12/1000000/real_time | 81793.07 | ns | 8029 |
| BM_tbb/8/1000000/real_time | 95906.80 | ns | 6798 |
| BM_dispenso_auto_chunk/3/1000000/real_time | 99208.27 | ns | 6673 |
| BM_dispenso_static_chunk/3/1000000/real_time | 99735.54 | ns | 6849 |
| BM_dispenso/3/1000000/real_time | 100608.57 | ns | 6994 |
| BM_tbb/6/1000000/real_time | 112290.76 | ns | 6342 |
| BM_dispenso/2/1000000/real_time | 130656.08 | ns | 4831 |
| BM_dispenso_static_chunk/2/1000000/real_time | 139543.52 | ns | 5065 |
| BM_dispenso_auto_chunk/2/1000000/real_time | 139560.82 | ns | 5003 |
| BM_tbb/4/1000000/real_time | 139698.55 | ns | 4860 |
| BM_dispenso/1/1000000/real_time | 139867.37 | ns | 4932 |
| BM_serial<kMediumSize> | 140220.02 | ns | 5058 |
| BM_dispenso_auto_chunk/1/1000000/real_time | 140546.11 | ns | 4826 |
| BM_dispenso_static_chunk/1/1000000/real_time | 141729.42 | ns | 5001 |
| BM_omp/1/1000000/real_time | 142139.26 | ns | 4901 |
| BM_tbb/3/1000000/real_time | 159286.10 | ns | 4372 |
| BM_tbb/2/1000000/real_time | 197691.51 | ns | 3473 |
| BM_tbb/1/1000000/real_time | 255397.88 | ns | 2790 |
| BM_taskflow/1/1000/real_time | 546853.21 | ns | 5334 |
| BM_omp/192/1000000/real_time | 547192.66 | ns | 1000 |
| BM_taskflow/16/1000/real_time | 569910.03 | ns | 1000 |
| BM_taskflow/4/1000/real_time | 591469.37 | ns | 4293 |
| BM_taskflow/6/1000/real_time | 609230.57 | ns | 3491 |
| BM_taskflow/3/1000/real_time | 622502.10 | ns | 5007 |
| BM_taskflow/8/1000/real_time | 654998.49 | ns | 2465 |
| BM_taskflow/2/1000/real_time | 674280.79 | ns | 5072 |
| BM_taskflow/12/1000/real_time | 690982.66 | ns | 1457 |
| BM_taskflow/24/1000/real_time | 712484.07 | ns | 1000 |
| BM_taskflow/32/1000/real_time | 785106.94 | ns | 1000 |
| BM_omp/192/1000/real_time | 806858.36 | ns | 1000 |
| BM_taskflow/48/1000/real_time | 916306.38 | ns | 1000 |
| BM_taskflow/64/1000/real_time | 1024707.12 | ns | 1000 |
| BM_taskflow/96/1000/real_time | 1245541.96 | ns | 1000 |
| BM_taskflow/128/1000/real_time | 1499205.95 | ns | 1000 |
| BM_taskflow/192/1000/real_time | 1973161.65 | ns | 931 |
| BM_omp/96/100000000/real_time | 3024072.90 | ns | 231 |
| BM_omp/128/100000000/real_time | 3029329.27 | ns | 226 |
| BM_omp/64/100000000/real_time | 3286724.07 | ns | 213 |
| BM_omp/48/100000000/real_time | 3956450.14 | ns | 180 |
| BM_taskflow/6/1000000/real_time | 4794919.01 | ns | 278 |
| BM_dispenso/128/100000000/real_time | 4871390.20 | ns | 144 |
| BM_dispenso_static_chunk/128/100000000/real_time | 4883607.76 | ns | 140 |
| BM_dispenso_auto_chunk/128/100000000/real_time | 4902677.42 | ns | 143 |
| BM_dispenso/96/100000000/real_time | 5012123.90 | ns | 100 |
| BM_dispenso_auto_chunk/96/100000000/real_time | 5017875.45 | ns | 138 |
| BM_dispenso_static_chunk/96/100000000/real_time | 5040013.72 | ns | 100 |
| BM_dispenso_auto_chunk/192/100000000/real_time | 5047666.54 | ns | 100 |
| BM_dispenso_static_chunk/192/100000000/real_time | 5061805.50 | ns | 141 |
| BM_omp/32/100000000/real_time | 5229961.34 | ns | 116 |
| BM_dispenso/192/100000000/real_time | 5276330.54 | ns | 100 |
| BM_taskflow/96/1000000/real_time | 5429429.76 | ns | 353 |
| BM_dispenso/64/100000000/real_time | 5482465.32 | ns | 128 |
| BM_dispenso_static_chunk/64/100000000/real_time | 5543290.59 | ns | 130 |
| BM_taskflow/192/1000000/real_time | 5592182.53 | ns | 327 |
| BM_dispenso_auto_chunk/64/100000000/real_time | 5612907.01 | ns | 129 |
| BM_taskflow/128/1000000/real_time | 5625497.53 | ns | 356 |
| BM_taskflow/48/1000000/real_time | 5689053.22 | ns | 324 |
| BM_dispenso/48/100000000/real_time | 5886660.78 | ns | 125 |
| BM_dispenso_auto_chunk/48/100000000/real_time | 5920482.75 | ns | 118 |
| BM_taskflow/64/1000000/real_time | 5948174.54 | ns | 364 |
| BM_dispenso_static_chunk/48/100000000/real_time | 6010324.29 | ns | 128 |
| BM_omp/192/100000000/real_time | 6108128.23 | ns | 104 |
| BM_taskflow/32/1000000/real_time | 6171818.82 | ns | 315 |
| BM_taskflow/3/1000000/real_time | 6219568.19 | ns | 193 |
| BM_tbb/24/100000000/real_time | 6263228.29 | ns | 133 |
| BM_taskflow/24/1000000/real_time | 6275633.12 | ns | 304 |
| BM_taskflow/8/1000000/real_time | 6300312.71 | ns | 311 |
| BM_dispenso_static_chunk/32/100000000/real_time | 6389218.51 | ns | 114 |
| BM_dispenso_auto_chunk/32/100000000/real_time | 6546843.06 | ns | 108 |
| BM_dispenso/32/100000000/real_time | 6593421.50 | ns | 116 |
| BM_taskflow/2/1000000/real_time | 6604859.86 | ns | 153 |
| BM_omp/24/100000000/real_time | 6626516.27 | ns | 107 |
| BM_taskflow/16/1000000/real_time | 6650885.28 | ns | 302 |
| BM_taskflow/12/1000000/real_time | 6813020.88 | ns | 279 |
| BM_tbb/64/100000000/real_time | 6981095.82 | ns | 100 |
| BM_tbb/32/100000000/real_time | 6986328.10 | ns | 100 |
| BM_tbb/16/100000000/real_time | 7093342.71 | ns | 94 |
| BM_taskflow/4/1000000/real_time | 7113504.38 | ns | 282 |
| BM_tbb/48/100000000/real_time | 7114865.59 | ns | 100 |
| BM_dispenso/24/100000000/real_time | 7250097.49 | ns | 122 |
| BM_taskflow/1/1000000/real_time | 7279314.77 | ns | 100 |
| BM_tbb/96/100000000/real_time | 7536042.17 | ns | 100 |
| BM_dispenso_auto_chunk/24/100000000/real_time | 7583352.84 | ns | 106 |
| BM_tbb/128/100000000/real_time | 7682778.87 | ns | 100 |
| BM_dispenso_static_chunk/24/100000000/real_time | 7857890.08 | ns | 109 |
| BM_tbb/12/100000000/real_time | 8902565.91 | ns | 112 |
| BM_tbb/192/100000000/real_time | 9091760.25 | ns | 109 |
| BM_dispenso_auto_chunk/16/100000000/real_time | 9122424.86 | ns | 81 |
| BM_dispenso_static_chunk/16/100000000/real_time | 9306342.17 | ns | 78 |
| BM_omp/16/100000000/real_time | 9350205.62 | ns | 72 |
| BM_dispenso/16/100000000/real_time | 9860906.70 | ns | 71 |
| BM_dispenso_static_chunk/12/100000000/real_time | 10026766.90 | ns | 86 |
| BM_dispenso_auto_chunk/12/100000000/real_time | 10832385.27 | ns | 70 |
| BM_tbb/8/100000000/real_time | 11004281.72 | ns | 89 |
| BM_omp/12/100000000/real_time | 11133746.28 | ns | 56 |
| BM_dispenso/12/100000000/real_time | 11271231.90 | ns | 78 |
| BM_dispenso_static_chunk/8/100000000/real_time | 13059871.40 | ns | 59 |
| BM_dispenso_auto_chunk/8/100000000/real_time | 13938469.56 | ns | 58 |
| BM_dispenso/8/100000000/real_time | 14192208.92 | ns | 69 |
| BM_tbb/6/100000000/real_time | 15095988.10 | ns | 49 |
| BM_dispenso/6/100000000/real_time | 15647722.28 | ns | 58 |
| BM_omp/8/100000000/real_time | 15955504.03 | ns | 55 |
| BM_tbb/4/100000000/real_time | 16061379.37 | ns | 56 |
| BM_tbb/3/100000000/real_time | 16766594.97 | ns | 37 |
| BM_dispenso_static_chunk/6/100000000/real_time | 17353821.75 | ns | 45 |
| BM_dispenso_auto_chunk/6/100000000/real_time | 17432353.05 | ns | 50 |
| BM_tbb/2/100000000/real_time | 17741018.30 | ns | 37 |
| BM_dispenso_static_chunk/4/100000000/real_time | 17982644.39 | ns | 39 |
| BM_dispenso/4/100000000/real_time | 18014059.96 | ns | 40 |
| BM_omp/6/100000000/real_time | 18132007.53 | ns | 38 |
| BM_omp/3/100000000/real_time | 18203397.16 | ns | 37 |
| BM_dispenso_auto_chunk/3/100000000/real_time | 18494825.00 | ns | 37 |
| BM_dispenso/3/100000000/real_time | 18509418.05 | ns | 34 |
| BM_dispenso_static_chunk/3/100000000/real_time | 18927706.66 | ns | 36 |
| BM_dispenso_auto_chunk/4/100000000/real_time | 19038795.34 | ns | 36 |
| BM_omp/4/100000000/real_time | 19171603.23 | ns | 37 |
| BM_dispenso/2/100000000/real_time | 19473719.25 | ns | 34 |
| BM_omp/2/100000000/real_time | 19527442.35 | ns | 42 |
| BM_dispenso_auto_chunk/2/100000000/real_time | 19951961.30 | ns | 35 |
| BM_dispenso_static_chunk/2/100000000/real_time | 20187116.00 | ns | 37 |
| BM_omp/1/100000000/real_time | 20525771.74 | ns | 34 |
| BM_dispenso_static_chunk/1/100000000/real_time | 20580910.42 | ns | 34 |
| BM_dispenso_auto_chunk/1/100000000/real_time | 20762949.31 | ns | 34 |
| BM_tbb/1/100000000/real_time | 20866145.35 | ns | 31 |
| BM_dispenso/1/100000000/real_time | 21120260.68 | ns | 31 |
| BM_serial<kLargeSize> | 26428675.59 | ns | 24 |
| BM_taskflow/128/100000000/real_time | 59453268.75 | ns | 34 |
| BM_taskflow/8/100000000/real_time | 62595140.93 | ns | 10 |
| BM_taskflow/192/100000000/real_time | 64361602.42 | ns | 37 |
| BM_taskflow/64/100000000/real_time | 66684033.89 | ns | 34 |
| BM_taskflow/16/100000000/real_time | 66729991.00 | ns | 16 |
| BM_taskflow/48/100000000/real_time | 67627636.19 | ns | 32 |
| BM_taskflow/96/100000000/real_time | 68590896.81 | ns | 37 |
| BM_taskflow/12/100000000/real_time | 68997006.20 | ns | 14 |
| BM_taskflow/6/100000000/real_time | 72940928.30 | ns | 10 |
| BM_taskflow/24/100000000/real_time | 73777416.03 | ns | 27 |
| BM_taskflow/32/100000000/real_time | 74728069.76 | ns | 30 |
| BM_taskflow/4/100000000/real_time | 86045934.82 | ns | 10 |
| BM_taskflow/3/100000000/real_time | 89860309.85 | ns | 10 |
| BM_taskflow/1/100000000/real_time | 114558587.60 | ns | 10 |
| BM_taskflow/2/100000000/real_time | 119014025.11 | ns | 10 |


================================================
FILE: docs/benchmarks/simple_pool_details.md
================================================
# simple_pool - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_dispenso/1/1000/real_time | 10.54 | us | 64357 |
| BM_tbb/1/1000/real_time | 23.99 | us | 29270 |
| BM_tbb/3/1000/real_time | 25.46 | us | 26479 |
| BM_tbb/2/1000/real_time | 26.13 | us | 26181 |
| BM_tbb/4/1000/real_time | 27.32 | us | 26363 |
| BM_tbb/8/1000/real_time | 29.91 | us | 23777 |
| BM_tbb/6/1000/real_time | 30.28 | us | 24338 |
| BM_tbb/12/1000/real_time | 41.60 | us | 16963 |
| BM_tbb/16/1000/real_time | 68.23 | us | 9860 |
| BM_tbb/24/1000/real_time | 97.13 | us | 6297 |
| BM_dispenso/1/10000/real_time | 103.31 | us | 6741 |
| BM_folly/1/1000/real_time | 189.08 | us | 3242 |
| BM_dispenso/4/1000/real_time | 193.91 | us | 3451 |
| BM_dispenso/6/1000/real_time | 197.88 | us | 3094 |
| BM_dispenso/8/1000/real_time | 202.04 | us | 3371 |
| BM_dispenso/12/1000/real_time | 209.83 | us | 3367 |
| BM_dispenso/3/1000/real_time | 211.28 | us | 3041 |
| BM_dispenso/16/1000/real_time | 224.00 | us | 2661 |
| BM_dispenso/32/1000/real_time | 224.37 | us | 2571 |
| BM_dispenso/24/1000/real_time | 224.76 | us | 2819 |
| BM_dispenso/2/1000/real_time | 248.56 | us | 2574 |
| BM_folly/2/1000/real_time | 252.88 | us | 2717 |
| BM_tbb/1/10000/real_time | 256.21 | us | 2810 |
| BM_tbb/2/10000/real_time | 263.70 | us | 2750 |
| BM_tbb/32/1000/real_time | 268.08 | us | 2566 |
| BM_tbb/3/10000/real_time | 273.28 | us | 2617 |
| BM_folly/3/1000/real_time | 277.40 | us | 2274 |
| BM_folly/4/1000/real_time | 279.80 | us | 2119 |
| BM_folly/32/1000/real_time | 281.06 | us | 2307 |
| BM_folly/12/1000/real_time | 281.56 | us | 2337 |
| BM_tbb/4/10000/real_time | 282.90 | us | 2519 |
| BM_folly/6/1000/real_time | 291.55 | us | 2422 |
| BM_folly/24/1000/real_time | 294.30 | us | 2334 |
| BM_folly/8/1000/real_time | 294.66 | us | 2194 |
| BM_tbb/6/10000/real_time | 296.90 | us | 2353 |
| BM_folly/96/1000/real_time | 311.25 | us | 1828 |
| BM_folly/64/1000/real_time | 314.58 | us | 2212 |
| BM_tbb/8/10000/real_time | 323.82 | us | 2055 |
| BM_folly/48/1000/real_time | 330.89 | us | 2200 |
| BM_dispenso/48/1000/real_time | 339.89 | us | 1689 |
| BM_folly/192/1000/real_time | 340.91 | us | 1736 |
| BM_tbb/48/1000/real_time | 348.14 | us | 1938 |
| BM_folly/128/1000/real_time | 378.25 | us | 1850 |
| BM_folly/16/1000/real_time | 382.47 | us | 2071 |
| BM_dispenso/64/1000/real_time | 395.60 | us | 1788 |
| BM_dispenso/192/1000/real_time | 430.14 | us | 1549 |
| BM_dispenso/96/1000/real_time | 449.10 | us | 1427 |
| BM_dispenso/128/1000/real_time | 469.87 | us | 1401 |
| BM_tbb/12/10000/real_time | 473.20 | us | 1493 |
| BM_tbb/64/1000/real_time | 601.56 | us | 1127 |
| BM_tbb/16/10000/real_time | 642.88 | us | 1091 |
| BM_tbb/192/1000/real_time | 1070.97 | us | 681 |
| BM_tbb/96/1000/real_time | 1084.40 | us | 651 |
| BM_tbb/128/1000/real_time | 1408.03 | us | 509 |
| BM_tbb/24/10000/real_time | 1724.75 | us | 471 |
| BM_folly/1/10000/real_time | 1892.82 | us | 365 |
| BM_dispenso/2/10000/real_time | 1933.88 | us | 1000 |
| BM_dispenso/16/10000/real_time | 1979.87 | us | 332 |
| BM_dispenso/4/10000/real_time | 1997.91 | us | 338 |
| BM_dispenso/3/10000/real_time | 2027.29 | us | 328 |
| BM_dispenso/32/10000/real_time | 2326.35 | us | 295 |
| BM_dispenso/8/10000/real_time | 2523.80 | us | 274 |
| BM_dispenso/24/10000/real_time | 2537.17 | us | 305 |
| BM_folly/2/10000/real_time | 2542.00 | us | 254 |
| BM_dispenso/6/10000/real_time | 2569.74 | us | 343 |
| BM_dispenso/12/10000/real_time | 2677.61 | us | 330 |
| BM_folly/4/10000/real_time | 2680.21 | us | 217 |
| BM_folly/8/10000/real_time | 2755.67 | us | 232 |
| BM_tbb/32/10000/real_time | 2807.62 | us | 237 |
| BM_folly/6/10000/real_time | 2814.11 | us | 252 |
| BM_folly/48/10000/real_time | 2826.28 | us | 215 |
| BM_folly/12/10000/real_time | 2844.92 | us | 223 |
| BM_folly/3/10000/real_time | 2852.37 | us | 244 |
| BM_folly/64/10000/real_time | 2870.29 | us | 191 |
| BM_folly/16/10000/real_time | 2914.58 | us | 240 |
| BM_folly/24/10000/real_time | 3109.16 | us | 250 |
| BM_folly/32/10000/real_time | 3169.13 | us | 197 |
| BM_folly/96/10000/real_time | 3194.18 | us | 185 |
| BM_dispenso/48/10000/real_time | 3601.29 | us | 207 |
| BM_folly/128/10000/real_time | 3649.48 | us | 186 |
| BM_folly/192/10000/real_time | 3743.20 | us | 178 |
| BM_tbb/48/10000/real_time | 3929.30 | us | 179 |
| BM_dispenso/64/10000/real_time | 3938.29 | us | 187 |
| BM_dispenso/96/10000/real_time | 4372.46 | us | 168 |
| BM_tbb/64/10000/real_time | 5224.33 | us | 100 |
| BM_dispenso/128/10000/real_time | 6107.40 | us | 112 |
| BM_dispenso/192/10000/real_time | 6420.05 | us | 117 |
| BM_tbb/96/10000/real_time | 7034.69 | us | 105 |
| BM_tbb/128/10000/real_time | 9558.52 | us | 91 |
| BM_dispenso/1/1000000/real_time | 10366.97 | us | 69 |
| BM_tbb/192/10000/real_time | 10396.79 | us | 68 |
| BM_tbb/2/1000000/real_time | 35423.01 | us | 19 |
| BM_tbb/1/1000000/real_time | 35423.55 | us | 21 |
| BM_tbb/3/1000000/real_time | 36120.21 | us | 19 |
| BM_tbb/4/1000000/real_time | 39177.53 | us | 19 |
| BM_tbb/6/1000000/real_time | 39235.43 | us | 18 |
| BM_tbb/8/1000000/real_time | 41579.33 | us | 17 |
| BM_tbb/12/1000000/real_time | 58430.34 | us | 9 |
| BM_tbb/16/1000000/real_time | 73021.82 | us | 9 |
| BM_dispenso/2/1000000/real_time | 89328.89 | us | 10 |
| BM_tbb/24/1000000/real_time | 157457.61 | us | 4 |
| BM_dispenso/4/1000000/real_time | 189653.57 | us | 3 |
| BM_dispenso/6/1000000/real_time | 198791.41 | us | 4 |
| BM_dispenso/12/1000000/real_time | 206894.35 | us | 3 |
| BM_dispenso/16/1000000/real_time | 213521.52 | us | 3 |
| BM_dispenso/3/1000000/real_time | 218034.68 | us | 3 |
| BM_folly/1/1000000/real_time | 222907.47 | us | 3 |
| BM_dispenso/24/1000000/real_time | 233469.20 | us | 3 |
| BM_dispenso/32/1000000/real_time | 240675.30 | us | 3 |
| BM_dispenso/8/1000000/real_time | 245718.29 | us | 3 |
| BM_folly/2/1000000/real_time | 246138.29 | us | 3 |
| BM_folly/24/1000000/real_time | 267477.37 | us | 3 |
| BM_folly/8/1000000/real_time | 273314.47 | us | 2 |
| BM_tbb/32/1000000/real_time | 274023.20 | us | 3 |
| BM_folly/3/1000000/real_time | 275581.05 | us | 2 |
| BM_folly/4/1000000/real_time | 280519.61 | us | 3 |
| BM_folly/16/1000000/real_time | 281471.96 | us | 2 |
| BM_folly/64/1000000/real_time | 287401.70 | us | 2 |
| BM_folly/32/1000000/real_time | 289419.48 | us | 2 |
| BM_folly/12/1000000/real_time | 293979.88 | us | 3 |
| BM_folly/48/1000000/real_time | 298945.00 | us | 2 |
| BM_folly/6/1000000/real_time | 299810.47 | us | 2 |
| BM_dispenso/48/1000000/real_time | 326641.45 | us | 2 |
| BM_folly/96/1000000/real_time | 342863.21 | us | 2 |
| BM_folly/128/1000000/real_time | 356469.77 | us | 2 |
| BM_folly/192/1000000/real_time | 364121.04 | us | 2 |
| BM_tbb/48/1000000/real_time | 455300.03 | us | 2 |
| BM_tbb/64/1000000/real_time | 509988.19 | us | 1 |
| BM_dispenso/64/1000000/real_time | 516101.40 | us | 1 |
| BM_dispenso/96/1000000/real_time | 579200.28 | us | 1 |
| BM_dispenso/128/1000000/real_time | 605308.39 | us | 1 |
| BM_dispenso/192/1000000/real_time | 622963.62 | us | 1 |
| BM_tbb/96/1000000/real_time | 724067.15 | us | 1 |
| BM_tbb/128/1000000/real_time | 832571.89 | us | 1 |
| BM_tbb/192/1000000/real_time | 1018550.28 | us | 1 |


================================================
FILE: docs/benchmarks/small_buffer_details.md
================================================
# small_buffer - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_newdelete<kSmallSize>/8192 | 79599.70 | ns | 8652 |
| BM_newdelete<kSmallSize>/8192/threads:16 | 100415.84 | ns | 6768 |
| BM_small_buffer_allocator<kLargeSize>/8192 | 112097.97 | ns | 6276 |
| BM_small_buffer_allocator<kMediumSize>/8192 | 113413.43 | ns | 6119 |
| BM_small_buffer_allocator<kSmallSize>/8192 | 151423.03 | ns | 4595 |
| BM_small_buffer_allocator<kSmallSize>/8192/threads:16 | 196510.87 | ns | 3648 |
| BM_small_buffer_allocator<kMediumSize>/8192/threads:16 | 215262.76 | ns | 3040 |
| BM_small_buffer_allocator<kLargeSize>/8192/threads:16 | 237795.91 | ns | 2896 |
| BM_newdelete<kSmallSize>/32768 | 333435.65 | ns | 2125 |
| BM_newdelete<kSmallSize>/32768/threads:16 | 422098.80 | ns | 1632 |
| BM_newdelete<kMediumSize>/8192 | 437184.17 | ns | 1565 |
| BM_small_buffer_allocator<kMediumSize>/32768 | 449722.73 | ns | 1567 |
| BM_small_buffer_allocator<kLargeSize>/32768 | 462629.58 | ns | 1577 |
| BM_small_buffer_allocator<kSmallSize>/32768 | 589785.80 | ns | 1194 |
| BM_newdelete<kMediumSize>/8192/threads:16 | 608506.93 | ns | 1104 |
| BM_small_buffer_allocator<kSmallSize>/32768/threads:16 | 732593.81 | ns | 880 |
| BM_newdelete<kLargeSize>/8192 | 778251.18 | ns | 908 |
| BM_small_buffer_allocator<kMediumSize>/32768/threads:16 | 866150.93 | ns | 768 |
| BM_small_buffer_allocator<kLargeSize>/32768/threads:16 | 951621.02 | ns | 704 |
| BM_newdelete<kLargeSize>/8192/threads:16 | 1198657.34 | ns | 560 |
| BM_newdelete<kMediumSize>/32768 | 2107102.61 | ns | 353 |
| BM_newdelete<kMediumSize>/32768/threads:16 | 2983152.38 | ns | 224 |
| BM_newdelete<kLargeSize>/32768 | 3427922.12 | ns | 208 |
| BM_newdelete<kLargeSize>/32768/threads:16 | 57061692.12 | ns | 16 |


================================================
FILE: docs/benchmarks/summing_for_details.md
================================================
# summing_for - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_serial<kSmallSize> | 293.83 | ns | 2421485 |
| BM_dispenso/32/1000/real_time | 303.68 | ns | 2163673 |
| BM_dispenso/24/1000/real_time | 304.40 | ns | 2275773 |
| BM_dispenso/3/1000/real_time | 305.75 | ns | 2161715 |
| BM_dispenso/64/1000/real_time | 306.46 | ns | 2301338 |
| BM_dispenso/12/1000/real_time | 307.29 | ns | 2281172 |
| BM_dispenso/4/1000/real_time | 307.36 | ns | 2336169 |
| BM_dispenso/128/1000/real_time | 307.60 | ns | 2282574 |
| BM_dispenso/48/1000/real_time | 310.42 | ns | 2246353 |
| BM_dispenso/96/1000/real_time | 312.39 | ns | 2235407 |
| BM_dispenso/8/1000/real_time | 312.51 | ns | 2296342 |
| BM_dispenso/16/1000/real_time | 312.57 | ns | 2232372 |
| BM_dispenso/2/1000/real_time | 312.65 | ns | 2277051 |
| BM_dispenso/6/1000/real_time | 315.57 | ns | 2279751 |
| BM_dispenso/1/1000/real_time | 328.90 | ns | 2270562 |
| BM_dispenso/192/1000/real_time | 333.18 | ns | 2070643 |
| BM_omp/1/1000/real_time | 979.75 | ns | 695515 |
| BM_omp/3/1000/real_time | 1582.02 | ns | 419117 |
| BM_omp/4/1000/real_time | 1594.61 | ns | 433390 |
| BM_omp/2/1000/real_time | 1629.78 | ns | 444018 |
| BM_omp/6/1000/real_time | 1712.38 | ns | 445655 |
| BM_omp/8/1000/real_time | 1937.83 | ns | 324890 |
| BM_omp/12/1000/real_time | 2986.33 | ns | 233931 |
| BM_omp/16/1000/real_time | 3931.25 | ns | 193648 |
| BM_omp/24/1000/real_time | 5685.50 | ns | 115632 |
| BM_omp/32/1000/real_time | 7349.03 | ns | 94335 |
| BM_omp/48/1000/real_time | 10582.43 | ns | 65859 |
| BM_omp/64/1000/real_time | 13590.13 | ns | 51023 |
| BM_tbb/192/1000/real_time | 15801.22 | ns | 40844 |
| BM_omp/48/1000000/real_time | 16360.23 | ns | 42532 |
| BM_omp/64/1000000/real_time | 17751.29 | ns | 39388 |
| BM_omp/32/1000000/real_time | 17793.64 | ns | 41819 |
| BM_tbb/128/1000/real_time | 17815.97 | ns | 39000 |
| BM_tbb/96/1000/real_time | 18143.36 | ns | 37680 |
| BM_tbb/64/1000/real_time | 18989.70 | ns | 37047 |
| BM_omp/24/1000000/real_time | 19174.97 | ns | 37530 |
| BM_omp/96/1000/real_time | 19288.31 | ns | 36234 |
| BM_tbb/48/1000/real_time | 19810.22 | ns | 34805 |
| BM_tbb/32/1000/real_time | 21870.54 | ns | 31543 |
| BM_tbb/6/1000/real_time | 21896.28 | ns | 32629 |
| BM_tbb/4/1000/real_time | 23486.34 | ns | 29726 |
| BM_omp/96/1000000/real_time | 23528.64 | ns | 29546 |
| BM_tbb/24/1000/real_time | 23884.88 | ns | 29435 |
| BM_omp/16/1000000/real_time | 24727.52 | ns | 26385 |
| BM_tbb/3/1000/real_time | 24750.63 | ns | 28189 |
| BM_omp/128/1000/real_time | 25315.97 | ns | 27365 |
| BM_tbb/16/1000/real_time | 26132.28 | ns | 26927 |
| BM_tbb/2/1000/real_time | 26446.82 | ns | 25938 |
| BM_tbb/8/1000/real_time | 26907.31 | ns | 29378 |
| BM_tbb/12/1000/real_time | 27032.30 | ns | 25780 |
| BM_tbb/1/1000/real_time | 28581.78 | ns | 24336 |
| BM_dispenso/192/1000000/real_time | 29364.74 | ns | 21793 |
| BM_omp/128/1000000/real_time | 29553.21 | ns | 23635 |
| BM_omp/12/1000000/real_time | 30252.13 | ns | 22745 |
| BM_dispenso/128/1000000/real_time | 30388.01 | ns | 23673 |
| BM_dispenso/96/1000000/real_time | 31328.91 | ns | 23100 |
| BM_tbb/192/1000000/real_time | 31411.03 | ns | 19743 |
| BM_tbb/96/1000000/real_time | 36236.11 | ns | 18951 |
| BM_tbb/128/1000000/real_time | 36968.26 | ns | 19132 |
| BM_dispenso/64/1000000/real_time | 39064.25 | ns | 14773 |
| BM_tbb/64/1000000/real_time | 40380.77 | ns | 17400 |
| BM_omp/8/1000000/real_time | 42368.29 | ns | 16308 |
| BM_tbb/48/1000000/real_time | 45433.36 | ns | 14911 |
| BM_dispenso/48/1000000/real_time | 48646.49 | ns | 14360 |
| BM_async/1/1000/real_time | 50876.75 | ns | 14112 |
| BM_omp/6/1000000/real_time | 54949.86 | ns | 12157 |
| BM_tbb/32/1000000/real_time | 54999.01 | ns | 11512 |
| BM_dispenso/32/1000000/real_time | 56922.79 | ns | 12645 |
| BM_dispenso/24/1000000/real_time | 61652.17 | ns | 13052 |
| BM_tbb/24/1000000/real_time | 65647.93 | ns | 10636 |
| BM_dispenso/16/1000000/real_time | 67001.75 | ns | 10080 |
| BM_async/2/1000/real_time | 71990.09 | ns | 9658 |
| BM_omp/4/1000000/real_time | 79126.38 | ns | 8771 |
| BM_tbb/16/1000000/real_time | 80930.47 | ns | 8598 |
| BM_dispenso/12/1000000/real_time | 81950.99 | ns | 8648 |
| BM_async/3/1000/real_time | 93541.61 | ns | 7173 |
| BM_tbb/12/1000000/real_time | 96547.34 | ns | 6977 |
| BM_dispenso/8/1000000/real_time | 102814.55 | ns | 6863 |
| BM_omp/3/1000000/real_time | 103848.32 | ns | 6900 |
| BM_dispenso/6/1000000/real_time | 117963.27 | ns | 5713 |
| BM_async/4/1000/real_time | 118024.91 | ns | 6153 |
| BM_tbb/8/1000000/real_time | 121520.50 | ns | 5534 |
| BM_tbb/6/1000000/real_time | 146487.97 | ns | 4621 |
| BM_omp/2/1000000/real_time | 151358.08 | ns | 4596 |
| BM_dispenso/4/1000000/real_time | 156793.06 | ns | 4475 |
| BM_tbb/4/1000000/real_time | 192405.42 | ns | 3701 |
| BM_dispenso/3/1000000/real_time | 198000.52 | ns | 3439 |
| BM_tbb/3/1000000/real_time | 226115.43 | ns | 3082 |
| BM_dispenso/2/1000000/real_time | 276074.72 | ns | 2366 |
| BM_tbb/2/1000000/real_time | 283698.92 | ns | 2479 |
| BM_serial<kMediumSize> | 288332.83 | ns | 2489 |
| BM_dispenso/1/1000000/real_time | 289774.28 | ns | 2379 |
| BM_omp/192/1000000/real_time | 295096.92 | ns | 12974 |
| BM_omp/1/1000000/real_time | 296049.68 | ns | 2380 |
| BM_async/4/1000000/real_time | 345505.83 | ns | 2063 |
| BM_tbb/1/1000000/real_time | 389299.33 | ns | 1764 |
| BM_async/3/1000000/real_time | 412842.51 | ns | 1769 |
| BM_async/2/1000000/real_time | 597696.43 | ns | 1128 |
| BM_omp/128/100000000/real_time | 796364.32 | ns | 824 |
| BM_async/6/1000/real_time | 867389.99 | ns | 1749 |
| BM_omp/192/1000/real_time | 895369.70 | ns | 10000 |
| BM_async/1/1000000/real_time | 972322.79 | ns | 750 |
| BM_omp/96/100000000/real_time | 981239.60 | ns | 716 |
| BM_omp/64/100000000/real_time | 1009220.71 | ns | 724 |
| BM_async/6/1000000/real_time | 1115505.46 | ns | 1000 |
| BM_omp/48/100000000/real_time | 1137284.57 | ns | 574 |
| BM_tbb/128/100000000/real_time | 1442124.58 | ns | 481 |
| BM_tbb/96/100000000/real_time | 1454035.12 | ns | 465 |
| BM_tbb/64/100000000/real_time | 1483187.66 | ns | 451 |
| BM_tbb/48/100000000/real_time | 1515557.06 | ns | 457 |
| BM_dispenso/192/100000000/real_time | 1737799.13 | ns | 397 |
| BM_tbb/192/100000000/real_time | 1743130.58 | ns | 403 |
| BM_dispenso/128/100000000/real_time | 1846680.61 | ns | 374 |
| BM_tbb/32/100000000/real_time | 1961485.35 | ns | 332 |
| BM_dispenso/96/100000000/real_time | 1984191.91 | ns | 367 |
| BM_omp/32/100000000/real_time | 2064963.75 | ns | 347 |
| BM_async/8/1000000/real_time | 2216675.86 | ns | 1000 |
| BM_dispenso/64/100000000/real_time | 2334231.17 | ns | 306 |
| BM_tbb/24/100000000/real_time | 2610180.18 | ns | 286 |
| BM_dispenso/48/100000000/real_time | 2881569.87 | ns | 229 |
| BM_omp/24/100000000/real_time | 3012322.41 | ns | 232 |
| BM_dispenso/32/100000000/real_time | 3572259.89 | ns | 206 |
| BM_async/8/1000/real_time | 4023138.00 | ns | 1000 |
| BM_tbb/16/100000000/real_time | 4291873.83 | ns | 156 |
| BM_dispenso/24/100000000/real_time | 4319180.90 | ns | 146 |
| BM_omp/192/100000000/real_time | 4500926.72 | ns | 402 |
| BM_omp/16/100000000/real_time | 4739963.64 | ns | 139 |
| BM_tbb/12/100000000/real_time | 5022023.12 | ns | 100 |
| BM_dispenso/16/100000000/real_time | 5971055.52 | ns | 100 |
| BM_async/12/1000/real_time | 6033315.88 | ns | 1000 |
| BM_async/12/1000000/real_time | 6247626.60 | ns | 100 |
| BM_omp/12/100000000/real_time | 6489813.38 | ns | 101 |
| BM_dispenso/12/100000000/real_time | 6893668.01 | ns | 126 |
| BM_async/16/100000000/real_time | 7895807.43 | ns | 114 |
| BM_tbb/8/100000000/real_time | 8118946.85 | ns | 127 |
| BM_async/32/100000000/real_time | 8126280.75 | ns | 100 |
| BM_async/24/100000000/real_time | 8579654.10 | ns | 100 |
| BM_async/16/1000/real_time | 8856314.27 | ns | 1040 |
| BM_omp/8/100000000/real_time | 9097573.55 | ns | 74 |
| BM_async/12/100000000/real_time | 9529437.79 | ns | 95 |
| BM_omp/6/100000000/real_time | 10114969.43 | ns | 66 |
| BM_tbb/6/100000000/real_time | 10222572.02 | ns | 106 |
| BM_async/8/100000000/real_time | 10432677.03 | ns | 66 |
| BM_dispenso/8/100000000/real_time | 10448925.46 | ns | 79 |
| BM_tbb/4/100000000/real_time | 10877465.47 | ns | 73 |
| BM_omp/3/100000000/real_time | 11031112.15 | ns | 66 |
| BM_omp/4/100000000/real_time | 11287493.34 | ns | 65 |
| BM_tbb/3/100000000/real_time | 11293117.38 | ns | 61 |
| BM_dispenso/6/100000000/real_time | 12560459.91 | ns | 58 |
| BM_async/16/1000000/real_time | 14340933.37 | ns | 49 |
| BM_omp/2/100000000/real_time | 14814295.83 | ns | 43 |
| BM_tbb/2/100000000/real_time | 15326512.56 | ns | 45 |
| BM_dispenso/4/100000000/real_time | 15513337.49 | ns | 51 |
| BM_async/6/100000000/real_time | 17430620.52 | ns | 51 |
| BM_dispenso/3/100000000/real_time | 18112291.99 | ns | 39 |
| BM_dispenso/2/100000000/real_time | 19437694.18 | ns | 35 |
| BM_async/4/100000000/real_time | 20126708.22 | ns | 35 |
| BM_async/48/100000000/real_time | 22682058.50 | ns | 51 |
| BM_async/24/1000000/real_time | 25457791.87 | ns | 47 |
| BM_async/24/1000/real_time | 25487308.85 | ns | 47 |
| BM_async/3/100000000/real_time | 26176047.08 | ns | 26 |
| BM_omp/1/100000000/real_time | 28418058.38 | ns | 25 |
| BM_dispenso/1/100000000/real_time | 28433731.83 | ns | 24 |
| BM_serial<kLargeSize> | 29512667.33 | ns | 24 |
| BM_tbb/1/100000000/real_time | 29538649.37 | ns | 24 |
| BM_async/32/1000000/real_time | 33337548.69 | ns | 18 |
| BM_async/32/1000/real_time | 34689523.04 | ns | 23 |
| BM_async/2/100000000/real_time | 38728812.71 | ns | 18 |
| BM_async/64/100000000/real_time | 39941762.63 | ns | 162 |
| BM_async/64/1000/real_time | 41071485.19 | ns | 229 |
| BM_async/96/100000000/real_time | 43392837.95 | ns | 70 |
| BM_async/48/1000/real_time | 50104875.49 | ns | 10 |
| BM_async/48/1000000/real_time | 59674201.35 | ns | 10 |
| BM_async/64/1000000/real_time | 71744002.93 | ns | 7 |
| BM_async/1/100000000/real_time | 73254492.55 | ns | 9 |
| BM_async/128/100000000/real_time | 88639985.03 | ns | 106 |
| BM_async/96/1000/real_time | 100230913.34 | ns | 7 |
| BM_async/96/1000000/real_time | 113880544.90 | ns | 7 |
| BM_async/128/1000000/real_time | 126038840.51 | ns | 4 |
| BM_async/192/1000000/real_time | 140036053.79 | ns | 69 |
| BM_async/128/1000/real_time | 149855462.83 | ns | 4 |
| BM_async/192/100000000/real_time | 162569639.27 | ns | 10 |
| BM_async/192/1000/real_time | 175513107.80 | ns | 4 |


================================================
FILE: docs/benchmarks/timed_task_details.md
================================================
# timed_task - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_dispenso<2,false>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso<4,false>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso<6,false>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso<2,true>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso<4,true>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso<6,true>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso_mixed<false>/manual_time | 100000000.00 | ns | 7 |
| BM_dispenso_mixed<true>/manual_time | 100000000.00 | ns | 7 |
| BM_folly<2,false>/manual_time | 100000000.00 | ns | 7 |
| BM_folly<4,false>/manual_time | 100000000.00 | ns | 7 |
| BM_folly<6,false>/manual_time | 100000000.00 | ns | 7 |
| BM_folly<2,true>/manual_time | 100000000.00 | ns | 7 |
| BM_folly<4,true>/manual_time | 100000000.00 | ns | 7 |
| BM_folly<6,true>/manual_time | 100000000.00 | ns | 7 |
| BM_folly_mixed<false>/manual_time | 100000000.00 | ns | 7 |
| BM_folly_mixed<true>/manual_time | 100000000.00 | ns | 7 |


================================================
FILE: docs/benchmarks/trivial_compute_details.md
================================================
# trivial_compute - Detailed Results

| Benchmark | Time | Unit | Iterations |
|-----------|------|------|------------|
| BM_dispenso/1/100/real_time | 1344.44 | ns | 506336 |
| BM_dispenso/12/100/real_time | 1349.15 | ns | 507858 |
| BM_dispenso/3/100/real_time | 1355.13 | ns | 471753 |
| BM_dispenso/2/100/real_time | 1356.45 | ns | 512761 |
| BM_dispenso/8/100/real_time | 1357.06 | ns | 519270 |
| BM_dispenso/16/100/real_time | 1357.71 | ns | 490978 |
| BM_dispenso/4/100/real_time | 1360.64 | ns | 514788 |
| BM_dispenso/6/100/real_time | 1367.74 | ns | 501954 |
| BM_dispenso/32/100/real_time | 1384.59 | ns | 497716 |
| BM_serial<kSmallSize> | 1388.89 | ns | 535052 |
| BM_dispenso/24/100/real_time | 1392.34 | ns | 500937 |
| BM_dispenso/96/100/real_time | 1394.35 | ns | 487503 |
| BM_dispenso/64/100/real_time | 1406.22 | ns | 503927 |
| BM_dispenso/192/100/real_time | 1417.08 | ns | 477554 |
| BM_dispenso/48/100/real_time | 1417.29 | ns | 487917 |
| BM_dispenso/128/100/real_time | 1419.52 | ns | 487375 |
| BM_omp/6/100/real_time | 1852.75 | ns | 372940 |
| BM_omp/4/100/real_time | 1868.87 | ns | 366585 |
| BM_omp/3/100/real_time | 1947.41 | ns | 361051 |
| BM_omp/1/100/real_time | 2053.59 | ns | 341674 |
| BM_omp/2/100/real_time | 2136.41 | ns | 334157 |
| BM_omp/8/100/real_time | 2419.16 | ns | 306408 |
| BM_omp/12/100/real_time | 3239.69 | ns | 223733 |
| BM_omp/16/100/real_time | 3866.78 | ns | 173413 |
| BM_omp/24/100/real_time | 6048.61 | ns | 107980 |
| BM_omp/32/100/real_time | 7208.93 | ns | 91980 |
| BM_tbb/1/100/real_time | 9956.27 | ns | 69526 |
| BM_tbb/4/100/real_time | 10338.85 | ns | 62419 |
| BM_omp/48/100/real_time | 10423.84 | ns | 65911 |
| BM_tbb/2/100/real_time | 10580.14 | ns | 66164 |
| BM_tbb/3/100/real_time | 10678.41 | ns | 65625 |
| BM_tbb/6/100/real_time | 11047.83 | ns | 58719 |
| BM_tbb/8/100/real_time | 11170.38 | ns | 58036 |
| BM_tbb/12/100/real_time | 13128.97 | ns | 55863 |
| BM_tbb/16/100/real_time | 13338.70 | ns | 52360 |
| BM_omp/64/100/real_time | 13925.51 | ns | 52097 |
| BM_tbb/192/100/real_time | 15354.70 | ns | 46192 |
| BM_tbb/24/100/real_time | 15732.87 | ns | 43677 |
| BM_tbb/32/100/real_time | 16213.78 | ns | 41827 |
| BM_tbb/48/100/real_time | 16805.92 | ns | 42221 |
| BM_tbb/64/100/real_time | 17084.22 | ns | 40515 |
| BM_tbb/96/100/real_time | 18076.52 | ns | 38187 |
| BM_tbb/128/100/real_time | 19383.07 | ns | 37343 |
| BM_omp/96/100/real_time | 19984.33 | ns | 35070 |
| BM_omp/128/100/real_time | 26077.38 | ns | 26849 |
| BM_async/1/100/real_time | 52053.03 | ns | 13750 |
| BM_async/2/100/real_time | 73590.79 | ns | 9433 |
| BM_async/3/100/real_time | 94926.26 | ns | 7218 |
| BM_async/4/100/real_time | 118803.99 | ns | 5783 |
| BM_async/6/100/real_time | 632490.27 | ns | 3202 |
| BM_tbb/128/1000000/real_time | 684519.56 | ns | 999 |
| BM_tbb/192/1000000/real_time | 745564.94 | ns | 853 |
| BM_tbb/96/1000000/real_time | 796475.15 | ns | 834 |
| BM_omp/192/100/real_time | 842929.03 | ns | 1000 |
| BM_omp/128/1000000/real_time | 1003604.88 | ns | 691 |
| BM_tbb/64/1000000/real_time | 1024205.89 | ns | 655 |
| BM_omp/96/1000000/real_time | 1177636.85 | ns | 579 |
| BM_dispenso/192/1000000/real_time | 1257235.68 | ns | 612 |
| BM_tbb/48/1000000/real_time | 1273336.71 | ns | 540 |
| BM_dispenso/128/1000000/real_time | 1416916.31 | ns | 515 |
| BM_dispenso/96/1000000/real_time | 1592645.49 | ns | 433 |
| BM_omp/64/1000000/real_time | 1606785.48 | ns | 429 |
| BM_tbb/32/1000000/real_time | 1838295.49 | ns | 392 |
| BM_omp/48/1000000/real_time | 2020463.78 | ns | 341 |
| BM_dispenso/64/1000000/real_time | 2160320.23 | ns | 338 |
| BM_tbb/24/1000000/real_time | 2314908.94 | ns | 301 |
| BM_dispenso/48/1000000/real_time | 2521290.78 | ns | 280 |
| BM_omp/32/1000000/real_time | 2988591.88 | ns | 236 |
| BM_dispenso/32/1000000/real_time | 3390598.72 | ns | 209 |
| BM_tbb/16/1000000/real_time | 3442439.87 | ns | 201 |
| BM_omp/24/1000000/real_time | 3776613.01 | ns | 184 |
| BM_dispenso/24/1000000/real_time | 4273963.58 | ns | 166 |
| BM_tbb/12/1000000/real_time | 4559491.65 | ns | 154 |
| BM_async/8/100/real_time | 4698381.14 | ns | 1000 |
| BM_omp/16/1000000/real_time | 5691946.09 | ns | 121 |
| BM_dispenso/16/1000000/real_time | 6113660.04 | ns | 115 |
| BM_omp/192/1000000/real_time | 6264927.94 | ns | 147 |
| BM_tbb/8/1000000/real_time | 6942634.91 | ns | 102 |
| BM_async/24/1000000/real_time | 7459626.13 | ns | 147 |
| BM_omp/12/1000000/real_time | 7639101.94 | ns | 91 |
| BM_dispenso/12/1000000/real_time | 8031436.69 | ns | 88 |
| BM_async/12/1000000/real_time | 8385342.45 | ns | 85 |
| BM_async/12/100/real_time | 8705155.72 | ns | 1000 |
| BM_tbb/6/1000000/real_time | 8987983.20 | ns | 76 |
| BM_async/16/1000000/real_time | 9150412.81 | ns | 108 |
| BM_async/16/100/real_time | 9518732.86 | ns | 1097 |
| BM_async/24/100/real_time | 9596137.81 | ns | 186 |
| BM_omp/8/1000000/real_time | 11331613.07 | ns | 61 |
| BM_dispenso/8/1000000/real_time | 11486515.08 | ns | 58 |
| BM_tbb/4/1000000/real_time | 13230409.99 | ns | 54 |
| BM_async/8/1000000/real_time | 13687067.44 | ns | 58 |
| BM_async/32/100/real_time | 14020894.71 | ns | 230 |
| BM_omp/6/1000000/real_time | 14770355.36 | ns | 46 |
| BM_dispenso/6/1000000/real_time | 15103521.42 | ns | 47 |
| BM_tbb/3/1000000/real_time | 17667740.63 | ns | 39 |
| BM_async/6/1000000/real_time | 17886491.66 | ns | 44 |
| BM_tbb/192/100000000/real_time | 18798915.17 | ns | 37 |
| BM_tbb/128/100000000/real_time | 19391815.95 | ns | 35 |
| BM_omp/4/1000000/real_time | 21915258.37 | ns | 32 |
| BM_tbb/96/100000000/real_time | 21983605.00 | ns | 30 |
| BM_async/4/1000000/real_time | 23137887.26 | ns | 30 |
| BM_dispenso/4/1000000/real_time | 23582652.21 | ns | 29 |
| BM_tbb/2/1000000/real_time | 25771252.88 | ns | 27 |
| BM_async/48/100/real_time | 27322856.65 | ns | 100 |
| BM_tbb/64/100000000/real_time | 27888176.96 | ns | 26 |
| BM_omp/3/1000000/real_time | 28351498.92 | ns | 25 |
| BM_dispenso/3/1000000/real_time | 28460979.07 | ns | 24 |
| BM_async/96/1000000/real_time | 29601378.34 | ns | 139 |
| BM_async/64/1000000/real_time | 30092573.89 | ns | 100 |
| BM_async/3/1000000/real_time | 30301994.28 | ns | 23 |
| BM_async/32/1000000/real_time | 32247921.71 | ns | 22 |
| BM_tbb/48/100000000/real_time | 33553121.88 | ns | 21 |
| BM_omp/2/1000000/real_time | 42140655.11 | ns | 17 |
| BM_dispenso/2/1000000/real_time | 42251319.39 | ns | 16 |
| BM_async/2/1000000/real_time | 43768641.07 | ns | 16 |
| BM_async/48/1000000/real_time | 46437838.52 | ns | 53 |
| BM_serial<kMediumSize> | 48910807.95 | ns | 14 |
| BM_tbb/32/100000000/real_time | 49079472.57 | ns | 15 |
| BM_omp/1/1000000/real_time | 49177274.24 | ns | 14 |
| BM_dispenso/1/1000000/real_time | 49955604.74 | ns | 13 |
| BM_tbb/1/1000000/real_time | 50727081.27 | ns | 13 |
| BM_async/1/1000000/real_time | 51434219.31 | ns | 13 |
| BM_dispenso/192/100000000/real_time | 51876360.28 | ns | 14 |
| BM_dispenso/128/100000000/real_time | 54079765.61 | ns | 12 |
| BM_async/128/1000000/real_time | 54664902.58 | ns | 10 |
| BM_omp/128/100000000/real_time | 55953454.69 | ns | 12 |
| BM_omp/192/100000000/real_time | 56427148.82 | ns | 13 |
| BM_dispenso/96/100000000/real_time | 57355264.98 | ns | 12 |
| BM_omp/96/100000000/real_time | 58653257.48 | ns | 12 |
| BM_async/64/100/real_time | 59718983.81 | ns | 10 |
| BM_async/96/100/real_time | 59727838.26 | ns | 10 |
| BM_tbb/24/100000000/real_time | 61510674.49 | ns | 11 |
| BM_dispenso/64/100000000/real_time | 62353394.77 | ns | 12 |
| BM_omp/64/100000000/real_time | 64967464.82 | ns | 11 |
| BM_async/96/100000000/real_time | 65399402.44 | ns | 12 |
| BM_async/64/100000000/real_time | 66466260.52 | ns | 10 |
| BM_dispenso/48/100000000/real_time | 68898697.01 | ns | 10 |
| BM_omp/48/100000000/real_time | 70999625.33 | ns | 10 |
| BM_async/48/100000000/real_time | 71698353.81 | ns | 10 |
| BM_async/128/100000000/real_time | 74639292.74 | ns | 12 |
| BM_dispenso/32/100000000/real_time | 82606205.25 | ns | 8 |
| BM_omp/32/100000000/real_time | 83087452.45 | ns | 8 |
| BM_async/32/100000000/real_time | 84679161.20 | ns | 8 |
| BM_async/192/100000000/real_time | 87922954.65 | ns | 13 |
| BM_tbb/16/100000000/real_time | 90113946.94 | ns | 8 |
| BM_omp/24/100000000/real_time | 98399825.39 | ns | 7 |
| BM_dispenso/24/100000000/real_time | 98741537.23 | ns | 7 |
| BM_async/24/100000000/real_time | 112120460.98 | ns | 7 |
| BM_async/192/100/real_time | 114132438.39 | ns | 7 |
| BM_async/128/100/real_time | 114184780.05 | ns | 7 |
| BM_tbb/12/100000000/real_time | 120064682.64 | ns | 6 |
| BM_async/16/100000000/real_time | 127706208.83 | ns | 5 |
| BM_omp/16/100000000/real_time | 127790067.03 | ns | 6 |
| BM_dispenso/16/100000000/real_time | 130645806.71 | ns | 5 |
| BM_dispenso/12/100000000/real_time | 155060677.79 | ns | 4 |
| BM_omp/12/100000000/real_time | 156344060.60 | ns | 5 |
| BM_async/12/100000000/real_time | 158928237.86 | ns | 4 |
| BM_async/192/1000000/real_time | 177043110.13 | ns | 24 |
| BM_tbb/8/100000000/real_time | 180690332.78 | ns | 4 |
| BM_omp/8/100000000/real_time | 215596551.08 | ns | 3 |
| BM_dispenso/8/100000000/real_time | 219137648.74 | ns | 3 |
| BM_tbb/6/100000000/real_time | 242198452.35 | ns | 3 |
| BM_async/8/100000000/real_time | 246772854.28 | ns | 3 |
| BM_omp/6/100000000/real_time | 270700647.81 | ns | 3 |
| BM_async/6/100000000/real_time | 275211435.74 | ns | 2 |
| BM_dispenso/6/100000000/real_time | 275489009.92 | ns | 3 |
| BM_tbb/4/100000000/real_time | 357043978.76 | ns | 2 |
| BM_dispenso/4/100000000/real_time | 376902008.43 | ns | 2 |
| BM_omp/4/100000000/real_time | 385768802.84 | ns | 2 |
| BM_async/4/100000000/real_time | 388617282.73 | ns | 2 |
| BM_tbb/3/100000000/real_time | 470701330.34 | ns | 2 |
| BM_dispenso/3/100000000/real_time | 491667879.75 | ns | 2 |
| BM_omp/3/100000000/real_time | 499298052.86 | ns | 2 |
| BM_async/3/100000000/real_time | 509331287.82 | ns | 1 |
| BM_async/2/100000000/real_time | 712217599.15 | ns | 1 |
| BM_dispenso/2/100000000/real_time | 712662089.62 | ns | 1 |
| BM_tbb/2/100000000/real_time | 716891551.39 | ns | 1 |
| BM_omp/2/100000000/real_time | 721065256.74 | ns | 1 |
| BM_dispenso/1/100000000/real_time | 1349818846.21 | ns | 1 |
| BM_async/1/100000000/real_time | 1357735339.55 | ns | 1 |
| BM_serial<kLargeSize> | 1365698352.46 | ns | 1 |
| BM_tbb/1/100000000/real_time | 1372401630.51 | ns | 1 |
| BM_omp/1/100000000/real_time | 1373702608.05 | ns | 1 |


================================================
FILE: docs/building.md
================================================
# Building Dispenso

Dispenso uses CMake as its build system for open-source builds. Internally at Meta, the Buck
build system is used, but Buck build files are not shipped externally.

Improvements to the CMake build and build files for additional build systems are welcome, as
are instructions for building on other platforms (BSD variants, Windows+Clang, etc).

## Prerequisites

### CMake

#### Fedora/RPM-based distros
```bash
sudo dnf install cmake
```

#### macOS
```bash
brew install cmake
```

#### Windows
Install CMake from <https://cmake.org/download/>

## Building the Library

### Linux and macOS
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT
make -j
```

### Windows
All commands should be run from the Developer Command Prompt (install Build Tools for
Visual Studio).
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT
cmake --build . --config Release
```

## Installing

Once built, install by building the "install" target:

**Linux and macOS:**
```bash
make install
```

**Windows (also works on any platform):**
```bash
cmake --build . --target install
```

## Using an Installed Dispenso

A downstream CMake project can be pointed to an installed dispenso by using
`CMAKE_PREFIX_PATH` or `Dispenso_DIR`, either as an environment variable or CMake
variable. All that is required is to link the imported CMake target `Dispenso::dispenso`:

```cmake
find_package(Dispenso REQUIRED)
target_link_libraries(myDispensoApp Dispenso::dispenso)
```

This brings in all required include paths, library files to link, and any other properties
to the `myDispensoApp` target.

## Testing

Tests are not built by default to keep dependencies minimal. Building tests requires
[GoogleTest](https://github.com/google/googletest).

### Linux and macOS
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release
make -j
ctest
```

### Windows
All commands should be run from the Developer Command Prompt.
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT -DDISPENSO_BUILD_TESTS=ON
cmake --build . --config Release
ctest
```

## Benchmarking

Dispenso has several benchmarks that can optionally benchmark against OpenMP, TBB, and/or
Folly. When benchmarks are enabled via `-DDISPENSO_BUILD_BENCHMARKS=ON`, the build will
attempt to find these libraries and enable their variants if found. None of these are
dependencies of the dispenso library itself — only the benchmark binaries.

The Folly variant is off by default due to common build issues across Folly releases.
However, the Folly benchmarks do run successfully on Meta's internal platform.

OpenMP should already be available on most platforms that support it. TBB can be installed
via e.g. `sudo dnf install tbb-devel`.

### Linux and macOS
```bash
mkdir build && cd build
cmake PATH_TO_DISPENSO_ROOT -DDISPENSO_BUILD_BENCHMARKS=ON -DCMAKE_BUILD_TYPE=Release
make -j
bin/once_function_benchmark  # example benchmark
```

### Windows
Not currently supported through CMake.


================================================
FILE: docs/custom.css
================================================
/**
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/* Dispenso custom styling for doxygen-awesome theme */

/* Dispenso green accent in dark mode */
html.dark-mode {
    --primary-color: #4caf50;
    --primary-dark-color: #388e3c;
    --primary-light-color: #81c784;
}

/* Dispenso green accent in light mode */
html:not(.dark-mode) {
    --primary-color: #2e7d32;
    --primary-dark-color: #1b5e20;
    --primary-light-color: #4caf50;
}


================================================
FILE: docs/design/barrier_dispatch.md
================================================
# Barrier-based Static Dispatch

## Problem

For memory-bound workloads with small per-chunk compute (e.g. repeated stencil
over large arrays), dispenso's task-based dispatch is ~2x slower than OpenMP's
`schedule(static)`. TBB shows the same gap -- this is inherent to task-queue
dispatch vs fork-join, not a dispenso-specific issue.

The overhead comes from:
- Closure packaging (OnceFunction allocation + type erasure)
- Concurrent queue enqueue/dequeue (CAS operations per task)
- Per-task atomic counter updates (outstandingTaskCount_)
- Polling-based wait vs optimized barrier synchronization

### Benchmark evidence (locality_benchmark, 166 HW threads)

Repeated 3-point stencil, 10 passes, 32M doubles (~256MB per array):

| Threads | OMP (static) | TBB | dispenso (kStatic) | dispenso (kAuto) |
|---------|-------------|------|-------------------|-----------------|
| 1       | 294ms       | 292ms | 287ms             | 281ms           |
| 4       | 75ms        | 104ms | 123ms             | 112ms           |
| 16      | 48ms        | 49ms  | 77ms              | 62ms            |
| 64      | 32ms        | 56ms  | 57ms              | 56ms            |
| 128     | 29ms        | 55ms  | 62ms              | 53ms            |

At 1 thread all four are equivalent, confirming the gap is purely dispatch
overhead at scale.

The effect is even more dramatic at smaller sizes where per-chunk work is
tiny (100K elements, 128 threads: OMP 491us vs dispenso_static 466us vs
dispenso_auto 1030us).

## Current dispatch path

```
parallel_for(kStatic) ->
  for each chunk:
    1. packageTask: wrap lambda in OnceFunction, atomic++ outstandingTaskCount_
    2. enqueue: CAS into moodycamel::ConcurrentQueue
    3. conditionallyWake: atomic read numSleeping_, epoch signal
  worker thread:
    4. try_dequeue: CAS from ConcurrentQueue
    5. execute closure via virtual dispatch (OnceCallable::run)
    6. atomic-- workRemaining_ (batched per 8)
  wait:
    7. poll outstandingTaskCount_ + steal from queue
```

Each chunk goes through steps 1-7. With 128 chunks x 10 passes = 1280
round-trips, each involving multiple cache-line-bouncing atomics across
128 cores.

## Proposed design: broadcast dispatch

For `parallel_for` with `kStatic` chunking, bypass the general task queue
and use a lightweight barrier-based dispatch.

### Data structures

```cpp
// Shared broadcast descriptor -- one per ThreadPool
struct BroadcastWork {
    void (*func)(size_t begin, size_t end, void* ctx);  // static trampoline
    void* ctx;                                           // captured state
    size_t total;                                        // total range size
    ssize_t numParticipants;                             // threads participating
    std::atomic<int> epoch{0};                           // signals new work
    std::atomic<int> completionCount{0};                 // barrier counter
    std::atomic<bool> claimed{false};                    // single-producer lock
};
```

### Dispatch path (parallel_for_staticImpl fast path)

```
parallel_for(kStatic) ->
  if nested (isPoolRecursive): fall back to normal task-queue path
  if !broadcast.claimed.exchange(true): fall back to normal path
  1. Write func, ctx, total, numParticipants to broadcast struct
  2. release fence
  3. epoch.fetch_add(1)             // single atomic -- signals all threads
  4. wakeN(sleeping)                // reuse existing mechanism
  5. Execute own chunk (tid=0) inline
  6. Spin on completionCount == numParticipants  (+ steal from general queue)
  7. Reset completionCount, release claimed
```

### Thread loop integration

```cpp
void ThreadPool::threadLoop(PerThreadData& data) {
    int lastEpoch = broadcast_.epoch.load(acquire);
    while (data.running()) {
        // Priority check: broadcast work available?
        int curEpoch = broadcast_.epoch.load(acquire);
        if (curEpoch != lastEpoch) {
            lastEpoch = curEpoch;
            // Compute own chunk arithmetically -- zero overhead
            size_t begin = (broadcast_.total * myTid) / broadcast_.numParticipants;
            size_t end = (broadcast_.total * (myTid + 1)) / broadcast_.numParticipants;
            broadcast_.func(begin, end, broadcast_.ctx);  // direct call
            broadcast_.completionCount.fetch_add(1, release);
            continue;  // re-check for more broadcast work
        }
        // Normal path: dequeue from general queue
        // ... existing try_dequeue logic ...
    }
}
```

### Type erasure without virtual dispatch

The broadcast function pointer uses a C-style `void(*)(size_t, size_t, void*)`
trampoline. `parallel_for` generates this at compile time:

```cpp
template <typename F>
void parallel_for_broadcast(TaskSet& tasks, size_t begin, size_t end, F&& f) {
    struct Ctx { F* func; };
    Ctx ctx{&f};
    auto trampoline = [](size_t b, size_t e, void* raw) {
        auto* c = static_cast<Ctx*>(raw);
        (*c->func)(b, e);
    };
    pool.broadcastDispatch(trampoline, &ctx, end - begin);
}
```

No OnceFunction, no allocation, no virtual dispatch. The trampoline is a
static function instantiated per call site.

### Nested parallelism

Do NOT use broadcast path when `PerPoolPerThreadInfo::isPoolRecursive(this)`
is true. Inner parallel_for would try to broadcast, but outer parallel_for's
threads are busy executing their outer chunks and won't service the broadcast.
This would deadlock.

Fall back to normal task-queue dispatch for nested cases. The existing
work-stealing in `wait()` handles this correctly.

### Concurrency with general tasks

The broadcast path is single-producer: only one parallel_for can use it at a
time. `claimed` is an atomic flag used as a try-lock:

```cpp
if (!broadcast_.claimed.exchange(true, acquire)) {
    // Someone else is using broadcast -- fall back to task queue
    return normal_dispatch(tasks, ...);
}
// ... do broadcast dispatch ...
broadcast_.claimed.store(false, release);
```

This keeps the optimization opportunistic. In the common case (single
parallel_for at a time, no nesting), it fires. When contended, it falls
back gracefully.

### Interaction with TaskSet wait/steal

The calling thread's `wait()` must still steal from the general queue while
waiting for broadcast completion. This prevents deadlock when broadcast
parallel_for is mixed with other queued work:

```cpp
// Caller after dispatching broadcast and executing own chunk:
while (broadcast_.completionCount.load(acquire) < numParticipants - 1) {
    pool.tryExecuteNext();  // steal general queue work
    // ... backoff ...
}
```

### Thread ID assignment

Each pool thread needs a stable `tid` for computing its chunk bounds. Options:
- Use the existing `PerPoolPerThreadInfo` thread-local registration to assign
  sequential IDs at thread creation
- Store tid in `PerThreadData` (already exists, just add an index field)

The caller (master thread) always takes tid=0.

## Expected impact

Should bring kStatic parallel_for close to OMP for regular, memory-bound
workloads. Per-dispatch overhead drops from O(numChunks) atomic operations
to O(1) (single epoch bump + barrier).

The remaining gap vs OMP would be:
- Barrier quality (OMP uses architecture-specific tree/butterfly barriers;
  we'd start with a simple atomic counter)
- Thread affinity (OMP runtimes pin threads to cores; dispenso doesn't)
- The epoch check in threadLoop adds one atomic load to every dequeue
  iteration (should be negligible -- same cache line, read-only when idle)

## Risks and open questions

- **Complexity**: Dual dispatch in threadLoop must not regress the common
  task-queue path. The epoch check is a single relaxed atomic load, but
  code complexity increases.
- **Fairness**: If broadcast work keeps arriving, general queue tasks could
  starve. May need a fairness counter (check general queue every N broadcast
  rounds, or after each broadcast completion).
- **Memory ordering**: The broadcast struct write must be visible before the
  epoch bump. A release fence between writing func/ctx/total and incrementing
  epoch suffices.
- **Thread count mismatch**: If some pool threads are sleeping and don't wake
  in time, the barrier would hang. Need a timeout or adaptive participant
  count based on actually-awake threads.
- **Partial pool usage**: If the user's TaskSet uses a subset of the pool's
  threads (via stealingLoadMultiplier), broadcast must respect this.

## Validation

locality_benchmark already exists and covers the target workload. Success
criteria: dispenso_static within 1.5x of OMP at 64-128 threads for the
32M stencil case (currently 2x).


================================================
FILE: docs/design/coroutines.md
================================================
# Dispenso Coroutine Support Design Document

## Overview

This document outlines the design for C++20 coroutine integration with dispenso. The goal is to
allow coroutines to run on dispenso's thread pool infrastructure, enabling ergonomic async
programming while leveraging dispenso's work stealing and nested parallelism capabilities.

## Requirements

**C++20 Required**: This feature requires C++20 coroutine support. We will not backport
coroutine machinery to C++14/C++17.

```cpp
#if defined(__cpp_impl_coroutine) && __cpp_impl_coroutine >= 201902L
#define DISPENSO_HAS_COROUTINES 1
#include <coroutine>
#endif
```

## Motivation

Traditional async patterns with dispenso:

```cpp
// Blocking: ties up a thread at each .get()
Future<int> f1 = async(pool, work1);
int a = f1.get();  // Thread blocked waiting
Future<int> f2 = async(pool, work2);
int b = f2.get();  // Thread blocked again
```

With coroutines:

```cpp
// Non-blocking: coroutine suspends, thread is freed
Task<int> example() {
    int a = co_await async(pool, work1);  // Suspends, thread freed
    int b = co_await async(pool, work2);  // Suspends, thread freed
    co_return a + b;
}
```

Benefits:
1. **Thread efficiency** - Suspended coroutines don't block threads
2. **Composability** - Chain async operations naturally with sequential-looking code
3. **Integration** - Works with dispenso's thread pools and work stealing

## Design Goals

1. **Lazy tasks** - Coroutines don't start until awaited or explicitly spawned
2. **Inline resumption by default** - Minimize scheduling overhead for compute workloads
3. **Explicit redistribution** - Users can fan out work to the pool when parallelism is needed
4. **I/O thread isolation** - I/O completions dispatch to compute pool, not inline
5. **Work stealing compatibility** - Scheduled continuations participate in work stealing

## Core Concepts

### Task<T>

A lazy coroutine return type representing async work that produces a value of type `T`.

```cpp
namespace dispenso {

template<typename T = void>
class Task {
 public:
  using promise_type = /* implementation defined */;

  // Not copyable
  Task(const Task&) = delete;
  Task& operator=(const Task&) = delete;

  // Movable
  Task(Task&& other) noexcept;
  Task& operator=(Task&& other) noexcept;

  // Awaitable
  auto operator co_await() && -> /* awaiter */;

  // Check if task holds a valid coroutine
  explicit operator bool() const noexcept;
};

}  // namespace dispenso
```

**Lazy semantics**: Calling a coroutine function returns a `Task` immediately without
executing any code. Execution begins when the task is awaited.

```cpp
Task<int> compute() {
    std::cout << "Starting\n";  // NOT printed until awaited
    co_return 42;
}

Task<int> t = compute();  // Nothing printed yet
int x = co_await t;       // NOW "Starting" is printed
```

### schedule_on

Suspend the current coroutine and resume on a thread pool.

```cpp
namespace dispenso {

auto schedule_on(ThreadPool& pool) -> /* awaitable */;

// Convenience: use global thread pool
auto schedule_on() -> /* awaitable */;

}  // namespace dispenso
```

Usage:

```cpp
Task<void> example() {
    // Running on some thread (maybe main)
    co_await schedule_on(pool);
    // Now running on a pool thread
    do_compute_work();
}
```

This is an explicit redistribution point - useful when you want to move execution to
the pool or yield to allow other work to run.

### Awaitable Future<T>

Make `dispenso::Future<T>` directly awaitable.

```cpp
namespace dispenso {

template<typename T>
auto operator co_await(Future<T>& future) -> /* awaiter */;

template<typename T>
auto operator co_await(Future<T>&& future) -> /* awaiter */;

}  // namespace dispenso
```

Usage:

```cpp
Task<int> example() {
    Future<int> fut = async(pool, [] { return expensive_compute(); });

    // Do other work while compute runs...

    int result = co_await fut;  // Suspend until future ready
    co_return result * 2;
}
```

## Resume Policies

Where a coroutine resumes after `co_await` depends on what was awaited:

| Awaitable | Resume Policy | Rationale |
|-----------|---------------|-----------|
| `Future<T>` | Inline | Throughput; completing thread continues |
| `schedule_on(pool)` | On pool | Explicit redistribution request |
| `spawn_on(pool, task)` | On pool | Child task runs on pool |
| I/O operations | On compute pool | Keep I/O thread free |

### Inline Resumption

When a `Future` completes, the thread that completed it directly resumes the
waiting coroutine. This minimizes overhead:

```
Pool Thread P1: [runs future work] -> [resumes coroutine] -> [continues coroutine]
```

No scheduling, no context switch - maximum throughput for compute-bound chains.

### Pool Resumption

When explicit redistribution is needed, the continuation is scheduled as new work:

```
Thread X: [completes operation] -> [schedules continuation on pool]
Pool Thread P2: [picks up continuation] -> [resumes coroutine]
```

This integrates with dispenso's work stealing - if the pool is saturated, the
scheduling thread may execute the continuation inline (preventing deadlock).

## Parallelism Primitives

### spawn_on

Start a task on a thread pool, returning a handle to await later.

```cpp
namespace dispenso {

template<typename T>
Task<T> spawn_on(ThreadPool& pool, Task<T> task);

// Convenience: use global thread pool
template<typename T>
Task<T> spawn_on(Task<T> task);

}  // namespace dispenso
```

Usage:

```cpp
Task<void> parallel_work() {
    // Fan out: start tasks on pool
    Task<int> a = spawn_on(pool, compute_a());
    Task<int> b = spawn_on(pool, compute_b());

    // Both running concurrently on pool threads

    // Fan in: await results
    int result_a = co_await a;
    int result_b = co_await b;

    co_return;
}
```

### when_all

Await multiple tasks concurrently, returning when all complete.

```cpp
namespace dispenso {

template<typename... Tasks>
Task<std::tuple</* result types */>> when_all(Tasks&&... tasks);

template<typename T>
Task<std::vector<T>> when_all(std::vector<Task<T>> tasks);

}  // namespace dispenso
```

Usage:

```cpp
Task<void> process_all(std::vector<Item>& items) {
    std::vector<Task<Result>> tasks;
    for (auto& item : items) {
        tasks.push_back(spawn_on(pool, process(item)));
    }

    std::vector<Result> results = co_await when_all(std::move(tasks));
}
```

### when_any (Future)

Await multiple tasks, returning when the first completes. Lower priority than `when_all`.

## Blocking Wait

For integration with non-coroutine code, provide a way to block until a task completes.

```cpp
namespace dispenso {

template<typename T>
T sync_wait(Task<T> task);

template<typename T>
T sync_wait(Task<T> task, ThreadPool& pool);

}  // namespace dispenso
```

Usage:

```cpp
int main() {
    Task<int> t = async_computation();
    int result = sync_wait(std::move(t));  // Block main thread until done
    std::cout << result << "\n";
}
```

## I/O Integration (Future Phase)

For high-performance async I/O, we would add an `IoContext` that manages platform-native
async I/O and dispatches completions to the compute pool.

### Architecture

```
                    ┌─────────────────┐
                    │  Compute Pool   │
                    │  (N threads)    │
                    └────────▲────────┘
                             │ schedule resumption
                    ┌────────┴────────┐
                    │   I/O Thread    │
                    │  (1-2 threads)  │
                    └────────▲────────┘
                             │ completion events
                    ┌────────┴────────┐
                    │     Kernel      │
                    │ io_uring/kqueue │
                    │     /IOCP       │
                    └─────────────────┘
```

The I/O thread:
1. Submits I/O requests to the kernel
2. Waits for completions (blocking on io_uring/kqueue/IOCP)
3. For each completion, schedules the coroutine resumption on the compute pool
4. Never runs compute work itself

### Platform Backends

| Platform | Backend | Notes |
|----------|---------|-------|
| Linux | io_uring | Kernel 5.1+, best performance |
| Linux (fallback) | epoll | Older kernels |
| macOS | kqueue | Native async events |
| Windows | IOCP | Overlapped I/O |

### API Sketch

```cpp
namespace dispenso::io {

class IoContext {
 public:
  explicit IoContext(ThreadPool& compute_pool);

  // Start the I/O thread
  void run();

  // Stop the I/O thread
  void stop();
};

// File operations
Task<std::vector<std::byte>> read_file(IoContext& ctx, const fs::path& path);
Task<void> write_file(IoContext& ctx, const fs::path& path, std::span<const std::byte> data);

// Potentially: sockets, timers, etc.

}  // namespace dispenso::io
```

## Error Handling

Exceptions thrown in coroutines are captured and rethrown when the task is awaited:

```cpp
Task<int> failing_task() {
    co_await schedule_on(pool);
    throw std::runtime_error("oops");
    co_return 42;  // Never reached
}

Task<void> caller() {
    try {
        int x = co_await failing_task();
    } catch (const std::runtime_error& e) {
        // Exception caught here
    }
}
```

## Cancellation (Future Consideration)

Potential integration with `std::stop_token` for cooperative cancellation:

```cpp
Task<int> cancellable_work(std::stop_token token) {
    for (int i = 0; i < 1000; ++i) {
        if (token.stop_requested()) {
            co_return -1;  // Early exit
        }
        co_await process_chunk(i);
    }
    co_return 0;
}
```

This is deferred to a future phase.

## Performance Considerations

### Coroutine Frame Allocation

Coroutine frames are typically heap-allocated. For performance-critical code:

1. **HALO (Heap Allocation eLision Optimization)** - Compilers can elide allocation
   when the coroutine lifetime is bounded by the caller
2. **Custom allocators** - `promise_type` can define `operator new` for custom allocation
3. **Frame pooling** - Reuse coroutine frames for frequently-created coroutines

### Symmetric Transfer

C++20 supports symmetric transfer via `await_suspend` returning a `coroutine_handle<>`.
This allows direct transfer between coroutines without growing the call stack:

```cpp
auto await_suspend(std::coroutine_handle<> h) -> std::coroutine_handle<> {
    // Instead of: h.resume(); return;
    // Return the handle to transfer to:
    return continuation_;  // Direct transfer, no stack growth
}
```

We should use symmetric transfer where applicable to prevent stack overflow in
deep coroutine chains.

### Comparison with std::execution::par

| Aspect | Coroutines | parallel_for |
|--------|------------|--------------|
| Overhead per operation | ~50-100ns (allocation) | ~10-20ns |
| Best for | I/O, async chains, latency | Compute, throughput |
| Thread blocking | Never | Never |
| Code style | Sequential-looking async | Explicit parallelism |

Use coroutines for async workflows; use `parallel_for` for data parallelism.

## Header Organization

```
dispenso/
├── coroutine.h              // Main header, includes all coroutine support
├── coroutine/
│   ├── task.h               // Task<T> definition
│   ├── schedule.h           // schedule_on, spawn_on
│   ├── sync_wait.h          // sync_wait for blocking
│   ├── when_all.h           // when_all combinator
│   └── future_awaitable.h   // operator co_await for Future<T>
└── io/                      // Future phase
    ├── io_context.h         // IoContext
    └── file.h               // File operations
```

## Implementation Phases

### Phase 1: Core Primitives

1. `Task<T>` with lazy semantics
2. `schedule_on(ThreadPool&)`
3. `sync_wait(Task<T>)`
4. Awaitable `Future<T>`

### Phase 2: Parallelism Combinators

1. `spawn_on(ThreadPool&, Task<T>)`
2. `when_all(Tasks...)`
3. `when_all(vector<Task<T>>)`

### Phase 3: I/O (Optional/Future)

1. `IoContext` with platform backends
2. File read/write operations
3. Potential: sockets, timers

## Testing Strategy

Each component should have tests for:

1. **Basic functionality** - Happy path execution
2. **Exception propagation** - Errors caught at await point
3. **Cancellation** - Coroutine destroyed before completion
4. **Thread safety** - Concurrent await, spawn, etc.
5. **Work stealing interaction** - Behavior under pool saturation
6. **Nested coroutines** - Coroutines spawning coroutines
7. **Memory** - No leaks, proper frame destruction

## Open Questions

1. **Naming**: `spawn_on` vs `start_on` vs `dispatch`?

2. **Default pool**: Should `schedule_on()` (no args) use a global pool, or require explicit pool?
   Recommendation: Require explicit pool for clarity, but could add a thread-local "current pool" concept.

3. **Task move semantics**: Should awaiting a Task consume it (require `co_await std::move(t)`)?
   Recommendation: Yes, prevents accidental double-await.

4. **Integration with dispenso::Future**: Should `Task<T>` be convertible to `Future<T>`?
   This would allow mixing coroutine and callback-based code.

5. **Allocator support**: Should `Task` support custom allocators for frame allocation?
   Recommendation: Defer to later phase; standard allocation is usually fine.

## References

- [C++20 Coroutines](https://en.cppreference.com/w/cpp/language/coroutines)
- [Lewis Baker's Coroutine Theory](https://lewissbaker.github.io/)
- [cppcoro Library](https://github.com/lewissbaker/cppcoro)
- [libcoro Library](https://github.com/jbaldwin/libcoro)
- [concurrencpp Library](https://github.com/David-Haim/concurrencpp)
- [libfork Library](https://github.com/ConorWilliams/libfork)
- [libunifex](https://github.com/facebookexperimental/libunifex)
- [io_uring](https://kernel.dk/io_uring.pdf)

## Existing Libraries Analysis

We surveyed existing C++ coroutine libraries to inform our design decisions.

### Library Comparison

| Library | C++ Version | Task Pattern | Thread Pool | Work Stealing | I/O Support | License |
|---------|-------------|--------------|-------------|---------------|-------------|---------|
| cppcoro | C++17 (experimental) | Lazy, symmetric transfer | static_thread_pool | Yes | io_service | MIT |
| libcoro | C++20 | Lazy, symmetric transfer | thread_pool | No (FIFO) | io_scheduler | MIT |
| concurrencpp | C++20 | Executor-based | Multiple types | No | No | MIT |
| libfork | C++20 | Fork-join focused | busy_pool, lazy_pool | Yes (NUMA-aware) | No | MPL-2.0 |

### Key Patterns to Adopt

**From cppcoro (Lewis Baker - designer of C++ coroutines):**

1. **Lazy Task semantics** - `initial_suspend()` returns `suspend_always`
2. **Symmetric transfer** - `await_suspend` returns `coroutine_handle<>` for stack-safe chaining
3. **final_awaitable pattern** - Continuation chaining without scheduler overhead

```cpp
// Pattern from cppcoro - final_awaitable for inline continuation
struct final_awaitable {
    bool await_ready() const noexcept { return false; }

    std::coroutine_handle<> await_suspend(std::coroutine_handle<promise_type> h) noexcept {
        // Symmetric transfer to continuation - no stack growth
        return h.promise().m_continuation;
    }

    void await_resume() noexcept {}
};
```

**From libcoro (modern C++20 reference):**

1. **Clean C++20 implementation** - Uses `<coroutine>` header (not experimental)
2. **std::variant for result storage** - Type-safe value/exception storage
3. **schedule_operation pattern** - Awaitable type for thread pool scheduling

### Why Not Depend on an Existing Library

1. **dispenso already has a superior ThreadPool** - Our work-stealing implementation is
   battle-tested and optimized. Adding cppcoro/libcoro would duplicate or conflict.

2. **Thin integration layer** - The core Task machinery is ~300-400 lines. We can adopt
   the patterns without taking a dependency.

3. **Control over resume policy** - We need fine-grained control over inline vs pool
   resumption to integrate with dispenso's work stealing.

4. **Licensing simplicity** - Keeping implementation in-house avoids license management.

### Implementation Approach

1. **Task<T>**: Adapt the cppcoro/libcoro pattern (lazy, symmetric transfer, final_awaitable)
2. **schedule_on**: Create awaitable that schedules continuation on dispenso::ThreadPool
3. **Awaitable Future<T>**: Wrap dispenso::Future with coroutine await interface
4. **when_all**: Reference libcoro's implementation for clean variadic/vector patterns

## Testing Strategy

Each component should have tests for:

1. **Basic functionality** - Happy path execution
2. **Exception propagation** - Errors caught at await point
3. **Cancellation** - Coroutine destroyed before completion
4. **Thread safety** - Concurrent await, spawn, etc.
5. **Work stealing interaction** - Behavior under pool saturation
6. **Nested coroutines** - Coroutines spawning coroutines
7. **Memory** - No leaks, proper frame destruction

### Test Categories

| Component | Test Coverage |
|-----------|---------------|
| `Task<T>` | Construction, move, await, exception, void specialization, reference types |
| `Task<void>` | Return void, exception propagation |
| `schedule_on` | Switch threads, work stealing under load, nested scheduling |
| `sync_wait` | Block until complete, exception propagation, timeout |
| `spawn_on` | Concurrent execution, result collection |
| `when_all` | Variadic, vector, mixed types, partial failure |
| `Future<T>` awaitable | Already-ready, pending, exception |

## Benchmarking Strategy

We will benchmark against existing coroutine libraries to validate our implementation
achieves competitive or superior performance, especially for dispenso's target use cases.

### Benchmark Libraries

- **cppcoro** - Baseline reference implementation
- **libcoro** - Modern C++20 comparison
- **libfork** - Work-stealing coroutine comparison
- **folly::coro** (if available) - Production-grade comparison

### Benchmark Scenarios

| Benchmark | Description | Metrics |
|-----------|-------------|---------|
| **Task creation/destruction** | Create and destroy N tasks without execution | ops/sec, memory |
| **Sequential await chain** | Chain of N co_await operations | latency, throughput |
| **Fan-out/fan-in** | spawn N tasks, when_all to join | throughput, scalability |
| **Nested parallelism** | Recursive task spawning (tree structure) | throughput, work stealing efficiency |
| **Mixed workload** | Combination of short/long tasks | fairness, tail latency |
| **sync_wait overhead** | Cost of blocking wait vs raw Future::get() | latency comparison |
| **schedule_on overhead** | Cost of thread switch vs direct pool.schedule() | latency comparison |
| **Contention** | Many threads spawning tasks concurrently | throughput under contention |

### Key Metrics

1. **Throughput** - Tasks completed per second
2. **Latency** - Time from spawn to completion (p50, p99, p999)
3. **Scalability** - Throughput vs thread count
4. **Memory overhead** - Per-task allocation overhead
5. **Context switch cost** - schedule_on latency

### Benchmark Implementation

```cpp
// Example: Fan-out/fan-in benchmark
void BM_FanOutFanIn(benchmark::State& state) {
    const int num_tasks = state.range(0);
    dispenso::ThreadPool pool(std::thread::hardware_concurrency());

    for (auto _ : state) {
        auto result = sync_wait(fan_out_fan_in(pool, num_tasks));
        benchmark::DoNotOptimize(result);
    }

    state.SetItemsProcessed(state.iterations() * num_tasks);
}
BENCHMARK(BM_FanOutFanIn)->Range(8, 8192);
```

### Comparison Methodology

1. **Equivalent workloads** - Same task logic across libraries
2. **Same hardware** - All benchmarks on identical machines
3. **Warm-up** - Discard initial iterations for JIT/cache effects
4. **Statistical rigor** - Multiple runs, report mean/stddev
5. **Profile-guided analysis** - Use perf/vtune to understand bottlenecks

## Implementation Checklist

Primitives in implementation order. Check off as completed.

### Phase 1: Core Primitives
- [ ] `Task<T>` (lazy coroutine type)
- [ ] `Task<void>` specialization
- [ ] `schedule_on(ThreadPool&)`
- [ ] `sync_wait(Task<T>)`
- [ ] Awaitable `Future<T>`

### Phase 2: Parallelism Combinators
- [ ] `spawn_on(ThreadPool&, Task<T>)`
- [ ] `when_all(Tasks...)` (variadic)
- [ ] `when_all(vector<Task<T>>)`
- [ ] `when_any` (optional)

### Phase 3: I/O (Future)
- [ ] `IoContext` base infrastructure
- [ ] Linux io_uring backend
- [ ] Linux epoll fallback
- [ ] macOS kqueue backend
- [ ] Windows IOCP backend
- [ ] `read_file` / `write_file`

### Testing & Benchmarks
- [ ] Unit tests for all Phase 1 primitives
- [ ] Unit tests for all Phase 2 combinators
- [ ] Benchmark: Task creation/destruction
- [ ] Benchmark: Sequential await chain
- [ ] Benchmark: Fan-out/fan-in
- [ ] Benchmark: Nested parallelism
- [ ] Benchmark: Comparison vs cppcoro
- [ ] Benchmark: Comparison vs libcoro
- [ ] Benchmark: Comparison vs libfork


================================================
FILE: docs/design/cpp20_concepts.md
================================================
# C++20 Concepts for Dispenso

## Overview

This document outlines a plan to add C++20 concept constraints to dispenso's template APIs.
The goal is to provide better error messages when template requirements are not met, while
maintaining full backward compatibility with C++14/17.

## Motivation

Current template errors in dispenso can be cryptic. For example, passing a non-callable
to `parallel_for` produces deep template instantiation errors. With concepts, users get
clear messages like "constraint not satisfied: F must be invocable with (size_t, size_t)".

## Design Principles

1. **Backward compatible** - All code that compiles with C++14/17 continues to work
2. **Graceful degradation** - Concepts only active when `__cpp_concepts >= 201907L`
3. **Incremental adoption** - Can add concepts to APIs one at a time
4. **Documentation value** - Concepts serve as executable documentation of requirements

## Implementation Strategy

### Feature Detection Macro

```cpp
// In dispenso/platform.h or a new dispenso/concepts.h

#if __cplusplus >= 202002L && defined(__cpp_concepts) && __cpp_concepts >= 201907L
#define DISPENSO_HAS_CONCEPTS 1
#else
#define DISPENSO_HAS_CONCEPTS 0
#endif
```

### Concept Definitions

```cpp
// dispenso/concepts.h (new header)

#pragma once

#include <dispenso/platform.h>

#if DISPENSO_HAS_CONCEPTS

#include <concepts>
#include <type_traits>

namespace dispenso {

/// A type that can be invoked with the given arguments
template<typename F, typename... Args>
concept Invocable = std::invocable<F, Args...>;

/// A type that can be invoked and returns a specific type
template<typename F, typename R, typename... Args>
concept InvocableR = std::invocable<F, Args...> &&
    std::convertible_to<std::invoke_result_t<F, Args...>, R>;

/// A callable suitable for parallel_for with index range
template<typename F>
concept ParallelForCallable = Invocable<F, size_t, size_t>;

/// A callable suitable for parallel_for with single index
template<typename F>
concept ParallelForIndexCallable = Invocable<F, size_t>;

/// A callable suitable for parallel_for with state
template<typename F, typename State>
concept ParallelForStateCallable = Invocable<F, State&, size_t, size_t>;

/// A callable suitable for TaskSet::schedule
template<typename F>
concept TaskCallable = Invocable<F> && std::is_void_v<std::invoke_result_t<F>>;

/// A state factory for parallel_for
template<typename F, typename State>
concept StateFactory = Invocable<F> &&
    std::convertible_to<std::invoke_result_t<F>, State>;

/// A binary operation for reduce
template<typename Op, typename T>
concept BinaryOp = Invocable<Op, T, T> &&
    std::convertible_to<std::invoke_result_t<Op, T, T>, T>;

/// Iterator that supports random access (needed for efficient parallel iteration)
template<typename It>
concept RandomAccessIterator = std::random_access_iterator<It>;

/// Iterator that is at least forward (minimum for parallel algorithms)
template<typename It>
concept ForwardIterator = std::forward_iterator<It>;

}  // namespace dispenso

#endif  // DISPENSO_HAS_CONCEPTS
```

### Applying Concepts to APIs

Use `requires` clauses that are conditionally compiled:

```cpp
// Example: parallel_for

#if DISPENSO_HAS_CONCEPTS
template <typename F>
  requires ParallelForCallable<F>
#else
template <typename F>
#endif
void parallel_for(size_t begin, size_t end, F&& f);
```

Or use a macro for cleaner syntax:

```cpp
// In platform.h or concepts.h
#if DISPENSO_HAS_CONCEPTS
#define DISPENSO_REQUIRES(...) requires (__VA_ARGS__)
#else
#define DISPENSO_REQUIRES(...)
#endif

// Usage
template <typename F>
DISPENSO_REQUIRES(ParallelForCallable<F>)
void parallel_for(size_t begin, size_t end, F&& f);
```

## Priority APIs for Concept Constraints

### High Priority (Most User-Facing)

| API | Concept Constraint |
|-----|-------------------|
| `parallel_for` (all overloads) | `ParallelForCallable`, `ParallelForIndexCallable`, `ParallelForStateCallable` |
| `TaskSet::schedule` | `TaskCallable` |
| `Future::then` | `Invocable<F, T>` where T is the Future's value type |
| `for_each` | `Invocable<F, T&>` where T is the element type |
| `Pipeline` stage functions | Various invocable constraints |

### Medium Priority

| API | Concept Constraint |
|-----|-------------------|
| `Graph` node functions | `TaskCallable` |
| `ConcurrentVector` operations | Element type constraints |
| `reduce` (when implemented) | `BinaryOp` |

### Lower Priority

| API | Concept Constraint |
|-----|-------------------|
| Allocator APIs | Standard allocator concepts |
| Internal utilities | Not user-facing, lower value |

## Example Error Messages

### Before (C++17)

```
error: no matching function for call to 'parallel_for'
note: candidate template ignored: substitution failure [with F = int]:
      no type named 'type' in 'std::invoke_result<int, unsigned long, unsigned long>'
```

### After (C++20 with concepts)

```
error: constraints not satisfied for 'parallel_for<int>'
note: because 'int' does not satisfy 'ParallelForCallable'
note: because 'std::invocable<int, size_t, size_t>' evaluated to false
```

## Testing Strategy

1. **Positive tests** - Verify valid code compiles with concepts enabled
2. **Negative tests** - Verify invalid code produces expected concept errors (manual verification)
3. **Cross-version tests** - Same tests pass on C++14, C++17, and C++20
4. **CI matrix** - Test with multiple compilers (GCC, Clang, MSVC) and standards

## Implementation Phases

### Phase 1: Infrastructure
- [ ] Add `DISPENSO_HAS_CONCEPTS` macro to platform.h
- [ ] Create `dispenso/concepts.h` with core concept definitions
- [ ] Add `DISPENSO_REQUIRES` macro

### Phase 2: Core APIs
- [ ] `parallel_for` (all overloads)
- [ ] `TaskSet::schedule`
- [ ] `for_each`

### Phase 3: Future and Async
- [ ] `Future::then`
- [ ] `Future::whenAll`
- [ ] `async`

### Phase 4: Advanced APIs
- [ ] `Pipeline` stage functions
- [ ] `Graph` node functions
- [ ] Parallel algorithms (as they are implemented)

## Compatibility Notes

- GCC 10+ supports concepts
- Clang 10+ supports concepts
- MSVC 19.28+ (VS 2019 16.8+) supports concepts
- Older compilers will simply not get the concept constraints

## Open Questions

1. Should we provide our own concept definitions or rely on `<concepts>` header?
   - Recommendation: Define our own in terms of std concepts for customization

2. Should concepts be in a sub-namespace?
   - Recommendation: No, keep in `dispenso::` for simplicity

3. Should we add concepts to internal/detail APIs?
   - Recommendation: No, focus on public APIs for maximum user benefit


================================================
FILE: docs/design/fast_math_roadmap.md
================================================
# dispenso::fast_math Roadmap

This document tracks planned features and improvements for the fast_math sublibrary.

## V1 Scope (Current)

### Functions Shipped

| Category | Functions |
|----------|-----------|
| Trigonometric | sin, cos, sincos, sinpi, cospi, sincospi, tan, asin, acos, atan, atan2 |
| Exponential | exp, exp2, exp10, expm1 |
| Logarithmic | log, log1p, log2, log10 |
| Hyperbolic | tanh |
| Power | pow, sqrt, cbrt, hypot |
| Other | frexp, ldexp |

All functions are templated on float type and support:
- Scalar `float`
- SSE4.1 (`SseFloat` / `__m128`)
- AVX2 (`AvxFloat` / `__m256`)
- AVX-512 (`Avx512Float` / `__m512`)
- NEON/aarch64 (`NeonFloat` / `float32x4_t`)
- Highway (`HwyFloat` / `hn::Vec<>`)

### V1 TODO

| Task | Status | Notes |
|------|--------|-------|
| Complete SIMD benchmark coverage | Done | All functions covered across SSE/AVX/AVX-512/NEON/Highway |
| Benchmark on Linux x86 | Done | Results recorded in README |
| Benchmark on Mac ARM | Pending | Compare vs Apple libm (different algorithms than glibc) |
| Benchmark on Windows MSVC | Pending | Compare vs MSVC CRT (different algorithms than glibc) |
| Rewrite README | Done | Structured docs with SIMD benchmarks, accuracy tables |
| Audit SIMD test edge cases | Done | NaN/inf/denorm coverage across all backends |
| Update sin_pi_4 via Sollya fpminimax | Done | 3.3x better sup-norm error (2.44e-10 → 7.32e-11) |
| Remove sin_wide/cos_wide | Done | Marginal 30% SIMD gain not worth maintaining; cos_wide unviable |
| Consolidate eval tools | Done | Single `ulp_eval` tool with dispenso::parallel_for, covers 12 functions |
| CMake build support | Done | Gated behind `-DDISPENSO_BUILD_FAST_MATH=ON` (experimental) |
| Evaluate additional transcendentals | Pending | sinh, cosh, tanh, pow, hypot, expm1, log1p (see Post-V1 section) |

---

## Post-V1: Additional Functions

Prioritized by demand in real-time graphics, physics, and ML inference workloads.

### Priority 1 — Strong candidates

| Function | Rationale |
|----------|-----------|
| `hypot(x, y)` | `sqrt(x*x + y*y)` with overflow/underflow protection. Very common in geometry code — dispenso's primary audience. Simple to implement well. |
| `pow(x, y)` | General power function. `exp(y * log(x))` works as a stopgap but loses accuracy for integer exponents and has edge case issues (negative base, 0^0). Dedicated implementation can be faster and more correct. |
| `sincos(x)` | Compute sin and cos simultaneously. Many workloads (rotation matrices, complex multiplication) need both. Shared range reduction makes this ~1.5x faster than separate sin+cos calls. |
| `sinpi(x)`, `cospi(x)`, `sincospi(x)` | Compute sin(πx), cos(πx), or both. Exact at integer and half-integer arguments without range reduction error accumulation. Important for FFT twiddle factors, Chebyshev nodes, and anywhere the argument is naturally in units of π. |

### Priority 2 — Useful in specific domains

| Function | Rationale |
|----------|-----------|
| `expm1(x)` | `exp(x) - 1` with precision near zero. Important for numerical stability in financial, scientific, and signal processing code. |
| `log1p(x)` | `log(1 + x)` with precision near zero. Paired with expm1 for stable computation. |
| `tanh(x)` | ML activation function. Most ML runs on GPU, but CPU inference (edge devices, ONNX Runtime) would benefit. Can be derived from exp. |

### Priority 3 — Niche / low demand

| Function | Rationale |
|----------|-----------|
| `sinh(x)`, `cosh(x)` | Hyperbolic functions. Rare in hot loops outside of specialized physics simulations. Derivable from exp. |
| `erf(x)`, `erfc(x)` | Error function. Statistical/ML use cases. GELU activation uses erf, but typically GPU-side. |
| `pow(x, int n)` | Integer power specialization. Faster than general pow via repeated squaring. |
| `fmod(x, y)`, `remainder(x, y)` | Modular arithmetic. Less clear speedup opportunity vs libc — these are integer-like. |
| `lgamma(x)`, `tgamma(x)` | Gamma functions. Very niche. |

### Future: GPU Device Functions (CUDA / HIP)

The tableless double-precision pow core (`pow_double_poly_core`) and its building
blocks (`DoubleVec`, `exp2_split`, `logarithmSep`) are pure arithmetic with no
platform-specific intrinsics — they use only `+`, `-`, `*`, `fma`, and
widening/narrowing conversions. This makes them natural candidates for GPU device
functions (`__device__`).

**Motivation**: GPU shader math libraries (CUDA `__powf`, HIP `__ocml_pow_f32`)
trade accuracy for speed. A <1 ULP pow on-device would serve:
- Physically-based rendering (PBR) and tone mapping
- ML inference with numerical sensitivity (small-exponent power laws)
- Scientific computing where double is too slow but single needs accuracy

**Approach — double-float (df64)**: Consumer GPUs have 1/32 double-precision
throughput (only datacenter GPUs like A100/H100 have 1/2). A true `double`
detour would be a net loss on most hardware. Instead, use **double-float**
arithmetic: carry extended precision as a `(hi, lo)` pair of `float` values,
using single-precision FMA for error-free transforms. This runs at full `float`
throughput on all GPUs.

The existing `log2_ext` in `fast_math_impl.h` is essentially a double-float
algorithm (extended-precision log2 with FMA error tracking), but it was designed
for CPU SIMD where the division and extra ops are tolerable. A GPU df64 version
would:
- Use the same `logarithmSep` range reduction (pure float/uint32)
- Evaluate `log2(m)` as a df64 polynomial (hi+lo pair, FMA error tracking)
- Multiply by `y` in df64 (Dekker splitting or FMA-based TwoProduct)
- Feed the result to `exp2` (existing float exp2 + EFT correction)

This avoids double entirely while achieving similar accuracy (~46 mantissa
bits in the y*log2(x) product). The CPU `DoubleVec` approach is the right
reference for algorithm structure, but the GPU implementation would substitute
df64 pairs for actual doubles.

**Key considerations**:
- `__fma_rn` (round-to-nearest FMA) is available on all CUDA architectures
  (sm_20+). HIP has `__fma`. Critical for df64 error-free transforms.
- Float↔double conversions are free on datacenter GPUs; on consumer GPUs the
  double path is prohibitive — df64 sidesteps this entirely.
- The `pow_double_hybrid_core` table-based path could also work on GPU
  (L1/shared memory lookups are fast), adapting gathers to `__ldg` or shared
  memory preload. Worth benchmarking against pure-polynomial df64.
- On datacenter GPUs with fast double (A100: 1/2 rate, H100: 1/2 rate), the
  existing `DoubleVec<float>` scalar-double approach may win outright — worth
  benchmarking both paths and dispatching based on `__CUDA_ARCH__`.

### Not planned

| Function | Reason |
|----------|--------|
| Double precision (`double`) variants | fast_math targets `float` workloads (graphics, real-time). Double-precision demand is low for our audience. Could be added later if justified. |
| Complex math | Out of scope — different domain. |

---

## Post-V1: Infrastructure

| Task | Priority | Notes |
|------|----------|-------|
| Benchmark automation script | High | Run all benchmarks, collect results, generate markdown tables. Per SLEEF: document CPU model, clock speed (turbo off), compiler version, OS. |
| Accuracy measurement automation | High | Sweep full domain of each function, compute max ULP error vs MPFR (like SLEEF does). Currently ULP numbers are manually measured. |
| Compiler Explorer (Godbolt) examples | Medium | Show generated assembly for scalar vs SIMD to demonstrate vectorization. |
| Comparison benchmarks vs other libs | Medium | Benchmark against SLEEF, Eve (Expressive Vector Engine), and Highway contrib/math. Compare speed and accuracy on identical hardware/inputs. Useful for positioning and identifying where we can improve. |
| Comparison table vs SLEEF/SVML | Medium | Side-by-side accuracy and speed comparison. Helps users decide. |
| Refit asin polynomials via Sollya with explicit FMA | Medium | `asin_0_pt5` uses non-FMA polynomial (`a * b + c`), which the compiler may auto-contract differently on SIMD vs scalar, causing 1 ULP divergence. Refit coefficients via Sollya `fpminimax` targeting explicit FMA evaluation (like `sin_pi_4` was done). This will make scalar and SIMD paths produce identical results. |
| Investigate Estrin polynomial evaluation | Medium | Current polynomials use Horner evaluation (sequential FMA chain). Estrin's scheme reduces critical-path depth from ~9 to ~4 FMAs, potentially improving SIMD throughput. However, coefficients were fitted for Horner evaluation order — switching to Estrin with existing coefficients regresses atan ULP from 3 to 4. Need to refit coefficients via Remez (requires modifying the boost-based Remez harness to use Estrin evaluation during fitting). Candidates: atan_poly, asin polynomials, and any other deep Horner chains. Also consider converting atan_poly to odd-only form (evaluate in x^2) to halve polynomial degree. |
| CMake SIMD backend options | Medium | Add CMake options to enable specific SIMD backends (SSE4.1, AVX2, AVX-512, NEON, Highway) and set appropriate compiler flags (e.g. `-mavx2`, `-mavx512f`). Include option for `-march=native`. Also handle finding/fetching Highway when the Highway backend is enabled. |
| Binary float constant representations | Medium | Replace decimal float literals with C99 hex float constants (e.g. `0x1.921fb6p+1f`) where appropriate. Hex floats are exact, avoid questions about truncation from excess decimal digits, and match the format used by Sollya/MPFR output. |
| Doxygen documentation for public API | Medium | Add doxygen comments to all public functions documenting precision, domain, AccuracyTraits behavior, and SIMD compatibility. Note which functions ignore AccuracyTraits. |
| `DefaultSimdFloat` usage examples | Medium | Show how to write portable SIMD code using the type alias. |
| Power accounting hooks | Low | Original design goal — compile-time zero-overhead instrumentation for power profiling. Deferred. |

---

## Design Notes

### Why not just use SLEEF?

dispenso::fast_math occupies a different niche:

1. **Tighter integration with dispenso** — Same build system, same namespace, same coding conventions. No external dependency to manage.
2. **FloatTraits abstraction** — Generic programming over any SIMD type (including raw intrinsics) via a single template parameter. SLEEF requires calling platform-specific function names.
3. **AccuracyTraits** — Compile-time selection between speed and accuracy variants via template parameter, not function name suffixes.
4. **Simpler subset** — 17 functions vs SLEEF's 70+. Less code to maintain, audit, and understand.
5. **Scalar-first design** — Every function works on plain `float` with the same API. SIMD is opt-in, not required.

For users who need the full breadth of SLEEF's function coverage, SLEEF is the right choice. For users who want fast transcendentals integrated into a C++ parallel computing library with a clean generic API, dispenso::fast_math fills that gap.

### SIMD Architecture Evolution

The current SIMD wrapper design (`FloatTraits<T>`, `SimdTypeFor<T>`) supports
fixed-width architectures (SSE, AVX, AVX-512, NEON) with native wrappers and
variable-width via Highway. This section outlines the path toward broader
architecture support and integration with dispenso's parallel algorithms.

#### Variable-Length Vector Architectures

**ARM SVE/SVE2** (128-2048 bit) and **RISC-V Vector Extension (RVV)** use
runtime-determined vector lengths. Key differences from fixed-width SIMD:

- Vector types are **sizeless** (`svfloat32_t`, `vfloat32m1_t`) — can't take
  sizeof, store in arrays, or (portably) place in structs.
- Comparisons return **predicate registers** (`svbool_t`, `vbool_t`), not
  lane-wide float masks. This differs from SSE/AVX (float masks) and AVX-512
  (bit masks), but the existing `BoolType_t<Flt>` / `bool_as_mask` abstraction
  extends naturally to predicates.
- RVV uses a **vector-length-agnostic (VLA)** programming model: `vsetvl(n)`
  sets the number of elements to process per iteration, and the hardware
  handles tail elements natively. This is closer to Cray-style vector
  processing than traditional SIMD.
- RVV's **LMUL** (register grouping) gangs 1-8 registers together, trading
  register count for width. Effective width is `VLEN * LMUL / element_bits`,
  all runtime-determined.

**Short-term strategy**: Use Highway for SVE/RVV. Highway already dispatches
to `HWY_SVE`, `HWY_SVE2`, and `HWY_RVV` targets. The `HwyFloat` wrapper
works regardless of underlying width. This avoids sizeless type complexity.

**Medium-term strategy**: If SVE becomes a high-priority target (e.g. for
Neoverse server workloads), add native `SveFloat` / `RvvFloat` wrappers with
predicate-aware `FloatTraits`. The existing `BoolType` pattern extends cleanly.

**POWER VMX/VSX**: Fixed 128-bit (4 lanes), similar to SSE. Low priority —
niche HPC audience. Highway covers it via `HWY_PPC8` if needed.

#### SimdOps: Cross-Lane Operations for Parallel Algorithms

`FloatTraits<T>` provides per-element operations (arithmetic, comparisons,
conditional blending) sufficient for fast_math. Dispenso's parallel algorithms
(reductions, scans, partitions) additionally need **cross-lane** operations.

Proposed `SimdOps<T>` trait alongside `FloatTraits<T>`:

```cpp
template <typename Flt>
struct SimdOps {
  // Runtime vector width (compile-time for SSE/AVX, runtime for SVE/RVV).
  static size_t width();

  // Horizontal operations.
  static float reduce_sum(Flt v);
  static float reduce_min(Flt v);
  static float reduce_max(Flt v);

  // Prefix sums (within a single SIMD register).
  static Flt inclusive_prefix_sum(Flt v);
  static Flt exclusive_prefix_sum(Flt v);

  // Lane permutations.
  static Flt shuffle(Flt v, IntType_t<Flt> indices);
  static Flt broadcast_lane(Flt v, int lane);
  static Flt rotate(Flt v, int amount);
};
```

These are **building blocks** for dispenso's task-parallel algorithms, not
full algorithms. The SIMD block operations compose with dispenso's existing
task scheduler:

1. **Phase 1** (parallel): Each task processes SIMD-width blocks using
   `SimdOps` (e.g. block prefix sum + block total).
2. **Phase 2** (sequential or recursive): Combine block totals across tasks.
3. **Phase 3** (parallel): Each task applies its offset to its block results.

This decomposition keeps SIMD building blocks pure and stateless, while
dispenso's `parallel_for` / `parallel_reduce` handles work distribution.

#### Widening Operations

Many reductions and scans require **widening accumulation** to avoid precision
loss or overflow:

- Sum of `float` buffer → `double` accumulator (catastrophic cancellation)
- Prefix sum of `int16_t` → `int32_t` output (overflow prevention)
- Histogram of `uint8_t` → `uint32_t` counts

This is a fundamental pattern, not a special case. The SIMD building blocks
must support it from the start:

```cpp
template <typename Flt>
struct SimdOps {
  using WideType = ...;  // SseFloat → SseDouble, NeonFloat → NeonDouble, etc.

  // Widening convert: N narrow elements → N wide elements (2 output vectors).
  static std::pair<WideType, WideType> widen(Flt v);

  // Widening reduce: accumulate narrow elements in wide precision.
  static scalar_t<WideType> reduce_sum_wide(Flt v);
};
```

Every architecture has native widening support: SSE `cvtps_pd`, NEON
`vcvt_f64_f32`, SVE `svcvt_f64_f32_x`, RVV `vfwcvt`. RVV's LMUL system
handles widening especially naturally — `vfwcvt.f.f.v` converts N floats to
N doubles using 2x the register group width with no loop splitting.


================================================
FILE: docs/design/parallel_algorithms.md
================================================
# Dispenso Parallel Algorithms Design Document

## Overview

This document outlines the design for a set of parallel algorithms in dispenso that mirror
the C++ standard library algorithms but use dispenso's execution model. This provides users
with familiar interfaces while leveraging dispenso's thread pool control, work stealing,
and nested parallelism support.

## Motivation

C++17 introduced execution policies for standard algorithms (`std::execution::par`), but:

1. **No thread pool control** - Users cannot specify which thread pool to use or limit parallelism
2. **Implementation-defined behavior** - Backends vary (TBB, OpenMP, proprietary)
3. **Limited customization** - No options for chunking strategy, stealing multiplier, etc.
4. **Nested parallelism issues** - Many implementations deadlock or perform poorly
5. **C++17 requirement** - Not available to C++14 codebases

Dispenso parallel algorithms address these limitations while providing a familiar API.

## Design Goals

1. **API compatibility** - Mirror std algorithm signatures where practical
2. **Performance** - Match or exceed std::execution::par performance
3. **Composability** - Algorithms should work well together and support nesting
4. **Flexibility** - Support custom thread pools and execution options
5. **Incremental adoption** - Users can migrate algorithm-by-algorithm

## Execution Policy

### Core Types

```cpp
namespace dispenso {

/// Options controlling parallel execution
struct ParallelOptions {
  /// Maximum threads to use (0 = use pool size)
  uint32_t maxThreads = 0;

  /// Minimum items per chunk to avoid excessive overhead
  uint32_t minItemsPerChunk = 1;

  /// Whether to wait for completion (always true for algorithms with return values)
  bool wait = true;
};

/// Execution policy referencing a specific thread pool
class ParallelPolicy {
 public:
  explicit ParallelPolicy(ThreadPool& pool, ParallelOptions options = {});
  explicit ParallelPolicy(ParallelOptions options = {});  // Uses global pool

  ThreadPool& pool() const;
  const ParallelOptions& options() const;
};

/// Create a parallel execution policy using the global thread pool
inline ParallelPolicy par(ParallelOptions options = {});

/// Create a parallel execution policy using a specific thread pool
inline ParallelPolicy par(ThreadPool& pool, ParallelOptions options = {});

}  // namespace dispenso
```

### Usage Examples

```cpp
// Use global thread pool with defaults
auto sum = dispenso::reduce(dispenso::par(), v.begin(), v.end(), 0);

// Use custom thread pool
dispenso::ThreadPool myPool(4);
dispenso::for_each(dispenso::par(myPool), v.begin(), v.end(), process);

// With options
dispenso::sort(dispenso::par({.maxThreads = 2}), v.begin(), v.end());
```

## Algorithm Categories

Algorithms are grouped by implementation complexity and interdependencies.

### Tier 1: Foundation Algorithms

These provide building blocks for other algorithms and should be implemented first.

| Algorithm | Description | Building Block |
|-----------|-------------|----------------|
| `for_each` | Apply function to range | Uses `dispenso::for_each` directly |
| `for_each_n` | Apply function to n elements | Uses `dispenso::for_each_n` directly |
| `transform` | Transform range to output | Parallel iteration, no dependencies |
| `fill` | Fill range with value | Parallel iteration, no dependencies |
| `fill_n` | Fill n elements | Parallel iteration, no dependencies |

### Tier 2: Reduction Algorithms

These require combining results from parallel work.

| Algorithm | Description | Implementation Notes |
|-----------|-------------|---------------------|
| `reduce` | Reduce range with binary op | Per-thread accumulators + final reduction |
| `transform_reduce` | Transform then reduce | Fused for efficiency |
| `count` | Count elements matching value | Reduction with increment |
| `count_if` | Count elements matching predicate | Reduction with conditional increment |

### Tier 3: Search Algorithms

These may terminate early and require coordination.

| Algorithm | Description | Implementation Notes |
|-----------|-------------|---------------------|
| `find` | Find first matching element | Parallel search with early termination |
| `find_if` | Find first element matching predicate | Parallel search with early termination |
| `find_if_not` | Find first element not matching | Parallel search with early termination |
| `any_of` | Check if any element matches | Parallel search, return on first match |
| `all_of` | Check if all elements match | Parallel search, return on first mismatch |
| `none_of` | Check if no elements match | Parallel search, return on first match |

### Tier 4: Mutating Algorithms

These modify the input range and may have ordering constraints.

| Algorithm | Description | Implementation Notes |
|-----------|-------------|---------------------|
| `copy` | Copy range | Parallel iteration |
| `copy_if` | Copy elements matching predicate | Parallel filter, requires output coordination |
| `move` | Move range | Parallel iteration |
| `replace` | Replace matching values | Parallel iteration |
| `replace_if` | Replace values matching predicate | Parallel iteration |

### Tier 5: Ordering Algorithms

These have complex parallelization patterns.

| Algorithm | Description | Implementation Notes |
|-----------|-------------|---------------------|
| `sort` | Sort range | Parallel quicksort or mergesort |
| `stable_sort` | Stable sort | Parallel mergesort |
| `partial_sort` | Partial sort | Parallel selection + sort |
| `nth_element` | Nth element partitioning | Parallel partitioning |
| `unique` | Remove consecutive duplicates | Parallel compaction (complex) |

### Tier 6: Numeric Algorithms

From `<numeric>`, often used in scientific computing.

| Algorithm | Description | Implementation Notes |
|-----------|-------------|---------------------|
| `inclusive_scan` | Inclusive prefix sum | Parallel scan algorithm |
| `exclusive_scan` | Exclusive prefix sum | Parallel scan algorithm |
| `transform_inclusive_scan` | Transform + inclusive scan | Fused operation |
| `transform_exclusive_scan` | Transform + exclusive scan | Fused operation |

## Implementation Strategy

### When to Use Existing Dispenso Primitives

**Use `dispenso::parallel_for` when:**
- Work is embarrassingly parallel (no cross-iteration dependencies)
- Output location is determined by input index
- Examples: `transform`, `fill`, `replace`

**Use `dispenso::for_each` when:**
- Iterating over non-random-access containers
- Work per element is the goal (side effects)
- Examples: `for_each` (direct mapping)

### When to Use Custom Implementation

**Custom implementation needed when:**
- Early termination is beneficial (`find`, `any_of`)
- Results must be combined across threads (`reduce`, `count`)
- Complex coordination required (`sort`, `unique`)
- Memory access patterns benefit from custom chunking

### Reduction Pattern

For algorithms that combine results (reduce, count, etc.):

```cpp
template<typename Policy, typename It, typename T, typename BinaryOp>
T reduce(Policy&& policy, It first, It last, T init, BinaryOp op) {
  const auto n = std::distance(first, last);
  if (n == 0) return init;

  ThreadPool& pool = policy.pool();
  const auto numThreads = std::min<size_t>(
      pool.numThreads() + 1,  // +1 for calling thread
      policy.options().maxThreads ? policy.options().maxThreads : pool.numThreads() + 1);

  // Per-thread partial results
  std::vector<T> partials;

  dispenso::parallel_for(
      partials,
      [&init]() { return init; },  // Initialize each partial with identity
      size_t{0}, static_cast<size_t>(n),
      [&](T& partial, size_t start, size_t end) {
        auto it = first;
        std::advance(it, start);
        for (size_t i = start; i < end; ++i, ++it) {
          partial = op(partial, *it);
        }
      });

  // Combine partials
  T result = init;
  for (const T& p : partials) {
    result = op(result, p);
  }
  return result;
}
```

### Early Termination Pattern

For search algorithms that can stop early:

```cpp
template<typename Policy, typename It, typename Pred>
It find_if(Policy&& policy, It first, It last, Pred pred) {
  const auto n = std::distance(first, last);
  if (n == 0) return last;

  std::atomic<ssize_t> foundIndex{-1};

  dispenso::parallel_for(
      policy.pool(),
      size_t{0}, static_cast<size_t>(n),
      [&](size_t i) {
        // Skip if already found earlier
        ssize_t found = foundIndex.load(std::memory_order_relaxed);
        if (found >= 0 && static_cast<ssize_t>(i) > found) {
          return;
        }

        auto it = first;
        std::advance(it, i);
        if (pred(*it)) {
          // Atomically update to earliest found
          ssize_t expected = foundIndex.load(std::memory_order_relaxed);
          while ((expected < 0 || static_cast<ssize_t>(i) < expected) &&
                 !foundIndex.compare_exchange_weak(expected, i)) {
          }
        }
      });

  ssize_t found = foundIndex.load();
  if (found < 0) return last;

  auto result = first;
  std::advance(result, found);
  return result;
}
```

Note: The above is a simple implementation. A production version would use chunking
for better cache behavior and reduced atomic contention.

## API Signatures

### Compatibility with std

Where possible, signatures should match std exactly (minus the execution policy):

```cpp
// std signature:
template<class ExecutionPolicy, class ForwardIt, class T>
T reduce(ExecutionPolicy&& policy, ForwardIt first, ForwardIt last, T init);

// dispenso signature:
template<class ForwardIt, class T>
T reduce(ParallelPolicy policy, ForwardIt first, ForwardIt last, T init);
```

### Signature Variations to Support

For each algorithm, support all standard overloads where practical:

```cpp
// reduce overloads
template<class It>
typename std::iterator_traits<It>::value_type
reduce(ParallelPolicy policy, It first, It last);

template<class It, class T>
T reduce(ParallelPolicy policy, It first, It last, T init);

template<class It, class T, class BinaryOp>
T reduce(ParallelPolicy policy, It first, It last, T init, BinaryOp op);
```

### Deferred Signatures (TODO)

Some overloads may be deferred if they add significant complexity:

- Iterator sentinel overloads (C++20 ranges)
- Projections (C++20)
- Overloads requiring complex type deduction

## Header Organization

```
dispenso/
├── algorithm.h              // Main header, includes all algorithms
├── algorithm/
│   ├── for_each.h          // for_each, for_each_n
│   ├── transform.h         // transform (unary and binary)
│   ├── fill.h              // fill, fill_n
│   ├── reduce.h            // reduce, transform_reduce
│   ├── count.h             // count, count_if
│   ├── find.h              // find, find_if, find_if_not
│   ├── predicates.h        // any_of, all_of, none_of
│   ├── copy.h              // copy, copy_if, copy_n
│   ├── replace.h           // replace, replace_if
│   ├── sort.h              // sort, stable_sort
│   └── scan.h              // inclusive_scan, exclusive_scan
└── execution_policy.h       // ParallelPolicy, ParallelOptions, par()
```

## Performance Considerations

### Chunking Strategy

- Default chunk size should amortize scheduling overhead
- For small workloads, fall back to sequential execution
- Allow user override via `ParallelOptions::minItemsPerChunk`

### Memory Access Patterns

- Prefer sequential memory access within chunks for cache efficiency
- For random-access iterators, use index-based chunking
- For forward iterators, pre-compute chunk boundaries

### Avoiding False Sharing

- Per-thread accumulators should be cache-line aligned
- Use `dispenso::ConcurrentVector` for parallel output when needed

### Nested Parallelism

- Algorithms should work correctly when called from within parallel regions
- Rely on dispenso's work-stealing to prevent deadlock

### SIMD Optimization Opportunities

For certain algorithms with contiguous memory and simple element types, SIMD vectorization
can provide significant additional performance gains beyond thread-level parallelism:

**High SIMD benefit:**
- `fill`, `fill_n` - Vectorized stores
- `copy`, `copy_n` - Vectorized loads/stores (or memcpy for trivially copyable types)
- `transform` - When operation maps to SIMD intrinsics (e.g., arithmetic, min/max)
- `reduce`, `transform_reduce` - Vectorized horizontal operations
- `min_element`, `max_element`, `minmax_element` - SIMD comparisons
- `count`, `count_if` - SIMD comparison + popcount
- `equal`, `mismatch` - SIMD comparison
- `find`, `find_if` - SIMD search with early termination

**Implementation considerations:**
- Require contiguous iterators (pointers or types satisfying contiguous_iterator)
- Restrict to arithmetic types, pointers, or types with known SIMD-friendly operations
- Consider platform-specific implementations (SSE, AVX, NEON) or rely on compiler auto-vectorization
- May provide specializations for common cases (e.g., `float`, `double`, `int32_t`)
- Chunk boundaries should be SIMD-aligned where beneficial

## Testing Strategy

Each algorithm should have tests for:

1. **Correctness** - Compare results to sequential std:: version
2. **Empty range** - Handle begin == end
3. **Single element** - Degenerate case
4. **Large range** - Verify parallelization occurs
5. **Custom thread pool** - Non-global pool works correctly
6. **Options** - maxThreads, minItemsPerChunk respected
7. **Iterator types** - Random access, bidirectional, forward (where applicable)
8. **Nested calls** - Algorithm called from within parallel_for

## Implementation Phases

### Phase 1: Foundation (Recommended First)

1. `ParallelPolicy` and `par()` execution policy
2. `for_each` / `for_each_n` (wraps existing dispenso::for_each)
3. `transform` (unary version)
4. `fill` / `fill_n`
5. `reduce` (all overloads)
6. `transform_reduce`

### Phase 2: Search and Count

1. `count` / `count_if`
2. `find` / `find_if` / `find_if_not`
3. `any_of` / `all_of` / `none_of`

### Phase 3: Mutating

1. `copy` / `copy_n`
2. `copy_if`
3. `replace` / `replace_if`
4. `transform` (binary version)

### Phase 4: Sorting

1. `sort`
2. `stable_sort`
3. `partial_sort`
4. `nth_element`

## Sorting Implementation Details

This section provides detailed design for parallel sorting algorithms.

### Shared Infrastructure

All sorting algorithms can share common building blocks:

```
┌─────────────────────────────────────────────────────────────┐
│                      Public APIs                             │
├─────────────────────┬─────────────────┬─────────────────────┤
│ dispenso::sort      │ stable_sort     │ radix_sort          │
│ (comparison-based)  │ (stable)        │ (integer keys)      │
└──────────┬──────────┴────────┬────────┴──────────┬──────────┘
           │                   │                   │
           ▼                   ▼                   ▼
┌─────────────────────────────────────────────────────────────┐
│                   Shared Infrastructure                      │
├─────────────────────────────────────────────────────────────┤
│ • Sorting networks for small arrays (N ≤ 32)                │
│ • Insertion sort fallback (N ≤ ~16-32)                      │
│ • Parallel partitioning                                      │
│ • Task spawning via TaskSet                                  │
│ • Threshold tuning (serial vs parallel cutoff)              │
└─────────────────────────────────────────────────────────────┘
```

### Sorting Networks

For small arrays (N ≤ 32), sorting networks provide optimal performance:

- **Compile-time fixed structure** - No branches, predictable execution
- **SIMD-friendly** - Can use min/max operations
- **Near-optimal comparator count** - Dense coverage for N=2 to N=32
- **Data-independent** - Same performance regardless of input order

```cpp
namespace dispenso::detail {

/// Conditionally swap two elements
template<typename T, typename Compare>
DISPENSO_INLINE void compareSwap(T& a, T& b, Compare comp) {
  // Branchless version for arithmetic types:
  // auto should_swap = comp(b, a);
  // auto tmp = should_swap ? a : b;  // min
  // b = should_swap ? b : a;         // max
  // a = tmp;

  // Branch version (compiler may optimize):
  if (comp(b, a)) {
    using std::swap;
    swap(a, b);
  }
}

/// Sorting network for N=4 (5 comparators, depth 3)
template<typename T, typename Compare>
void sortingNetwork4(T* arr, Compare comp) {
  compareSwap(arr[0], arr[1], comp);
  compareSwap(arr[2], arr[3], comp);
  compareSwap(arr[0], arr[2], comp);
  compareSwap(arr[1], arr[3], comp);
  compareSwap(arr[1], arr[2], comp);
}

/// Near-optimal networks for all sizes 2-32
template<typename T, typename Compare>
void sortingNetwork2(T* arr, Compare comp);
template<typename T, typename Compare>
void sortingNetwork3(T* arr, Compare comp);
// ... networks for 4-32

/// Dispatch to appropriate sorting network
template<typename T, typename Compare>
void smallSort(T* arr, size_t n, Compare comp) {
  // Dispatch table or switch to networks for sizes 2-32
  // All sizes covered by near-optimal networks
  switch (n) {
    case 0: case 1: return;
    case 2: sortingNetwork2(arr, comp); return;
    case 3: sortingNetwork3(arr, comp); return;
    case 4: sortingNetwork4(arr, comp); return;
    // ... etc through 32
    default: assert(false && "Use parallel sort for n > 32");
  }
}

/// For stable sort, insertion sort is better than trying to make networks stable
template<typename T, typename Compare>
void stableSmallSort(T* arr, size_t n, Compare comp) {
  // Insertion sort is naturally stable and simpler than adding
  // index tracking to networks
  for (size_t i = 1; i < n; ++i) {
    T tmp = std::move(arr[i]);
    size_t j = i;
    while (j > 0 && comp(tmp, arr[j - 1])) {
      arr[j] = std::move(arr[j - 1]);
      --j;
    }
    arr[j] = std::move(tmp);
  }
}

}  // namespace dispenso::detail
```

**Why networks for unstable, insertion sort for stable:**

| Property | Sorting Networks | Insertion Sort |
|----------|-----------------|----------------|
| Stable | No (adding stability adds overhead) | Yes (naturally) |
| Branchless | Yes | No |
| SIMD-friendly | Yes | Limited |
| Data-dependent perf | No (fixed comparisons) | Yes (O(n) best, O(n²) worst) |
| Best for | Unstable small sort | Stable small sort |

### dispenso::sort (Parallel Quicksort)

Primary comparison-based parallel sort.

**Algorithm:**
1. For small arrays (N ≤ threshold), use sorting network or insertion sort
2. Choose pivot (median-of-3 or sampling for large arrays)
3. Parallel partition around pivot
4. Recursively sort partitions:
   - Large partitions: spawn parallel tasks
   - Small partitions: sort inline (serial)

**Key parameters:**
- `kSerialThreshold` - Below this, use serial sort (~10K-50K elements)
- `kNetworkThreshold` - Below this, use sorting network (~32 elements)
- `kSamplingThreshold` - Above this, use sampling for pivot selection

```cpp
template<typename RandomIt, typename Compare>
void sort(ParallelPolicy policy, RandomIt first, RandomIt last, Compare comp) {
  const auto n = std::distance(first, last);
  if (n <= kSerialThreshold) {
    std::sort(first, last, comp);  // or custom serial sort
    return;
  }

  TaskSet tasks(policy.pool());
  parallelQuicksort(tasks, first, last, comp, /*depth=*/0);
  tasks.wait();
}

template<typename RandomIt, typename Compare>
void parallelQuicksort(TaskSet& tasks, RandomIt first, RandomIt last,
                       Compare comp, int depth) {
  const auto n = std::distance(first, last);

  // Base case: small arrays
  if (n <= kNetworkThreshold) {
    detail::smallSort(&*first, n, comp);
    return;
  }
  if (n <= kSerialThreshold || depth > kMaxDepth) {
    std::sort(first, last, comp);
    return;
  }

  // Partition
  auto pivot = selectPivot(first, last, comp);
  auto mid = partition(first, last, pivot, comp);

  // Recurse in parallel
  tasks.schedule([&tasks, first, mid, comp, depth]() {
    parallelQuicksort(tasks, first, mid, comp, depth + 1);
  });
  parallelQuicksort(tasks, mid, last, comp, depth + 1);
}
```

### dispenso::stable_sort (Parallel Mergesort)

Stable comparison-based parallel sort using mergesort.

**Algorithm:**
1. Divide array into chunks
2. Sort chunks in parallel (can use quicksort for chunks)
3. Parallel merge sorted chunks

**Note:** Requires O(n) auxiliary memory.

### MSD Radix Sort (Hybrid)

For integer keys, MSD (Most Significant Digit) radix sort with hybrid fallback.

**Algorithm:**
1. Count elements per bucket based on current digit (parallel histogram)
2. Compute prefix sums for bucket boundaries
3. Distribute elements to buckets (parallel scatter)
4. Recursively sort each bucket:
   - Large buckets: parallel MSD radix on next digit
   - Medium buckets: switch to comparison sort
   - Small buckets: sorting network

**Interface:**

```cpp
namespace dispenso {

/// Sort integers using parallel MSD radix sort
/// Requires auxiliary memory of size n
template<typename RandomIt>
void radix_sort(ParallelPolicy policy, RandomIt first, RandomIt last);

/// Sort integers with custom key extraction
template<typename RandomIt, typename KeyFn>
void radix_sort(ParallelPolicy policy, RandomIt first, RandomIt last, KeyFn key);

/// Sort with pre-allocated auxiliary buffer
template<typename RandomIt, typename KeyFn>
void radix_sort(ParallelPolicy policy, RandomIt first, RandomIt last,
                KeyFn key, typename std::iterator_traits<RandomIt>::value_type* aux);

}  // namespace dispenso
```

**Hybrid strategy:**
- Start with 8-bit or 11-bit digits (256 or 2048 buckets)
- Switch to comparison sort when bucket size < threshold (~256-1024)
- Use sorting network for tiny buckets (< 32)

**Performance characteristics:**
- O(n * k) where k = number of digits
- Excellent cache behavior with appropriate digit size
- Outperforms comparison sort for large arrays of integers/floats

### Performance Considerations

**Threshold tuning:**
- Thresholds are hardware-dependent (cache size, core count)
- Consider auto-tuning or platform-specific defaults
- Provide user override via ParallelOptions

**Memory access:**
- Partition in-place to minimize memory traffic
- For stable_sort/radix_sort, pre-allocate auxiliary buffer
- Consider NUMA awareness for very large arrays

**Task granularity:**
- Avoid spawning tasks for very small partitions
- Limit recursion depth to prevent task explosion
- Use work-stealing naturally via TaskSet

### Phase 5: Advanced

1. `inclusive_scan` / `exclusive_scan`
2. `transform_inclusive_scan` / `transform_exclusive_scan`
3. `unique`

## Complete Algorithm Inventory

This section tracks all C++ standard library algorithms that support parallel execution policies.
Algorithms are categorized by priority based on their value in a parallel context.

### Priority Definitions

- **High**: Algorithms that benefit significantly from parallelization and are commonly used
- **Medium**: Algorithms with moderate parallel benefit or less common use cases
- **Low**: Algorithms where parallelization provides minimal benefit or adds unnecessary overhead

### High Priority Algorithms

| Algorithm | Status | Notes |
|-----------|--------|-------|
| `for_each` | Planned (Phase 1) | Core building block |
| `for_each_n` | Planned (Phase 1) | Core building block |
| `transform` | Planned (Phase 1) | Embarrassingly parallel |
| `reduce` | Planned (Phase 1) | Common reduction pattern |
| `transform_reduce` | Planned (Phase 1) | Fused transform + reduce |
| `sort` | Planned (Phase 4) | High-value, well-understood parallel algorithms |
| `stable_sort` | Planned (Phase 4) | Parallel mergesort |
| `fill` | Planned (Phase 1) | Simple parallel write |
| `fill_n` | Planned (Phase 1) | Simple parallel write |
| `copy` | Planned (Phase 3) | Parallel memory copy |
| `copy_n` | Planned (Phase 3) | Parallel memory copy |
| `inclusive_scan` | Planned (Phase 5) | Prefix sum, scientific computing |
| `exclusive_scan` | Planned (Phase 5) | Prefix sum, scientific computing |
| `transform_exclusive_scan` | Planned (Phase 5) | Fused operation |
| `transform_inclusive_scan` | Planned (Phase 5) | Fused operation |
| `count` | Planned (Phase 2) | Parallel reduction |
| `count_if` | Planned (Phase 2) | Parallel reduction with predicate |
| `find` | Planned (Phase 2) | Parallel search with early termination |
| `find_if` | Planned (Phase 2) | Parallel search with early termination |
| `find_if_not` | Planned (Phase 2) | Parallel search with early termination |
| `any_of` | Planned (Phase 2) | Parallel short-circuit evaluation |
| `all_of` | Planned (Phase 2) | Parallel short-circuit evaluation |
| `none_of` | Planned (Phase 2) | Parallel short-circuit evaluation |
| `inner_product` | Not yet planned | Equivalent to transform_reduce |
| `adjacent_difference` | Not yet planned | Parallel stencil operation |
| `max_element` | Not yet planned | Parallel reduction to find max |
| `min_element` | Not yet planned | Parallel reduction to find min |
| `minmax_element` | Not yet planned | Parallel reduction to find both |

### Medium Priority Algorithms

| Algorithm | Status | Notes |
|-----------|--------|-------|
| `copy_if` | Planned (Phase 3) | Requires output coordination |
| `replace` | Planned (Phase 3) | Simple parallel mutation |
| `replace_if` | Planned (Phase 3) | Simple parallel mutation |
| `replace_copy` | Not yet planned | Parallel copy with replacement |
| `replace_copy_if` | Not yet planned | Parallel copy with conditional replacement |
| `move` | Planned (Phase 3) | Parallel move semantics |
| `partial_sort` | Planned (Phase 4) | Parallel selection + sort |
| `partial_sort_copy` | Not yet planned | Parallel partial sort to output |
| `nth_element` | Planned (Phase 4) | Parallel partitioning |
| `partition` | Not yet planned | Parallel partitioning |
| `stable_partition` | Not yet planned | Stable parallel partitioning |
| `partition_copy` | Not yet planned | Parallel partition to two outputs |
| `unique` | Planned (Phase 5) | Complex parallel compaction |
| `unique_copy` | Not yet planned | Parallel unique to output |
| `merge` | Not yet planned | Parallel merge of sorted ranges |
| `inplace_merge` | Not yet planned | In-place parallel merge |
| `set_union` | Not yet planned | Parallel set operation |
| `set_intersection` | Not yet planned | Parallel set operation |
| `set_difference` | Not yet planned | Parallel set operation |
| `set_symmetric_difference` | Not yet planned | Parallel set operation |
| `generate` | Not yet planned | Parallel generation (if generator is thread-safe) |
| `generate_n` | Not yet planned | Parallel generation (if generator is thread-safe) |
| `remove` | Not yet planned | Parallel compaction |
| `remove_if` | Not yet planned | Parallel compaction with predicate |
| `remove_copy` | Not yet planned | Parallel filtered copy |
| `remove_copy_if` | Not yet planned | Parallel filtered copy with predicate |
| `reverse` | Not yet planned | Parallel in-place reversal |
| `reverse_copy` | Not yet planned | Parallel reverse to output |
| `rotate` | Not yet planned | Parallel rotation |
| `rotate_copy` | Not yet planned | Parallel rotate to output |
| `swap_ranges` | Not yet planned | Parallel swap |
| `uninitialized_copy` | Not yet planned | Parallel uninitialized memory copy |
| `uninitialized_copy_n` | Not yet planned | Parallel uninitialized memory copy |
| `uninitialized_fill` | Not yet planned | Parallel uninitialized fill |
| `uninitialized_fill_n` | Not yet planned | Parallel uninitialized fill |
| `equal` | Not yet planned | Parallel comparison with early termination |
| `mismatch` | Not yet planned | Parallel search for first difference |
| `is_sorted` | Not yet planned | Parallel scan; less common with large sequences |
| `is_sorted_until` | Not yet planned | Parallel scan; less common with large sequences |
| `is_partitioned` | Not yet planned | Parallel scan; less common with large sequences |
| `lexicographical_compare` | Not yet planned | Parallel comparison; less common with large sequences |
| `includes` | Not yet planned | Parallel subset check |
| `search` | Not yet planned | Parallel subsequence search |
| `search_n` | Not yet planned | Parallel consecutive value search |

### Low Priority Algorithms

These algorithms have limited benefit from parallelization due to their nature
(e.g., heap operations have inherent sequential dependencies, or the operations
are too simple to justify parallel overhead in most cases).

| Algorithm | Status | Notes |
|-----------|--------|-------|
| `is_heap` | Not yet planned | Heap structure has sequential dependencies |
| `is_heap_until` | Not yet planned | Heap structure has sequential dependencies |
| `adjacent_find` | Not yet planned | Requires checking adjacent pairs, limited benefit |
| `find_end` | Not yet planned | Searching from end, complex parallelization |
| `find_first_of` | Not yet planned | Multi-target search, moderate complexity |

### Algorithm Status Legend

- **Planned (Phase N)**: Included in implementation roadmap
- **Not yet planned**: Identified but not scheduled; may be added based on demand
- **Implemented**: Available in dispenso (update when completed)
- **Deferred**: Explicitly deferred due to complexity or low value

## Open Questions

1. **Namespace**: Should algorithms live in `dispenso::` or a sub-namespace like `dispenso::alg::`?

2. **Sequential fallback**: Should there be automatic fallback to sequential for small inputs?
   Recommended: Yes, with a reasonable threshold (e.g., < 1000 elements or < numThreads chunks).

3. **Exception handling**: How should exceptions from user functors be handled?
   Recommended: Follow std behavior - propagate one exception, others may be lost.

4. **Allocator support**: Should algorithms that need temporary storage accept allocators?
   Recommended: Defer to later; use dispenso's internal allocation initially.

## References

- [C++ Standard Algorithms Library](https://en.cppreference.com/w/cpp/algorithm)
- [C++17 Execution Policies](https://en.cppreference.com/w/cpp/algorithm/execution_policy_tag)
- [P0024R2 - The Parallelism TS Should be Standardized](https://wg21.link/p0024r2)

## Implementation Checklist

Algorithms in implementation order. Check off as completed.

### Phase 1: Foundation
- [ ] `for_each`
- [ ] `for_each_n`
- [ ] `transform`
- [ ] `fill`
- [ ] `fill_n`
- [ ] `reduce`
- [ ] `transform_reduce`

### Phase 2: Search and Count
- [ ] `count`
- [ ] `count_if`
- [ ] `find`
- [ ] `find_if`
- [ ] `find_if_not`
- [ ] `any_of`
- [ ] `all_of`
- [ ] `none_of`

### Phase 3: Mutating
- [ ] `copy`
- [ ] `copy_n`
- [ ] `copy_if`
- [ ] `move`
- [ ] `replace`
- [ ] `replace_if`

### Phase 4: Sorting
- [ ] `sort`
- [ ] `stable_sort`
- [ ] `partial_sort`
- [ ] `nth_element`

### Phase 5: Advanced
- [ ] `inclusive_scan`
- [ ] `exclusive_scan`
- [ ] `transform_inclusive_scan`
- [ ] `transform_exclusive_scan`
- [ ] `unique`

### Future (High Priority)
- [ ] `min_element`
- [ ] `max_element`
- [ ] `minmax_element`
- [ ] `inner_product`
- [ ] `adjacent_difference`

### Future (Medium Priority)
- [ ] `replace_copy`
- [ ] `replace_copy_if`
- [ ] `partial_sort_copy`
- [ ] `partition`
- [ ] `stable_partition`
- [ ] `partition_copy`
- [ ] `unique_copy`
- [ ] `merge`
- [ ] `inplace_merge`
- [ ] `set_union`
- [ ] `set_intersection`
- [ ] `set_difference`
- [ ] `set_symmetric_difference`
- [ ] `generate`
- [ ] `generate_n`
- [ ] `remove`
- [ ] `remove_if`
- [ ] `remove_copy`
- [ ] `remove_copy_if`
- [ ] `reverse`
- [ ] `reverse_copy`
- [ ] `rotate`
- [ ] `rotate_copy`
- [ ] `swap_ranges`
- [ ] `uninitialized_copy`
- [ ] `uninitialized_copy_n`
- [ ] `uninitialized_fill`
- [ ] `uninitialized_fill_n`
- [ ] `equal`
- [ ] `mismatch`
- [ ] `is_sorted`
- [ ] `is_sorted_until`
- [ ] `is_partitioned`
- [ ] `lexicographical_compare`
- [ ] `includes`
- [ ] `search`
- [ ] `search_n`

### Future (Low Priority)
- [ ] `is_heap`
- [ ] `is_heap_until`
- [ ] `adjacent_find`
- [ ] `find_end`
- [ ] `find_first_of`


================================================
FILE: docs/design/release_checklist.md
================================================
# Release Checklist

Post-release tasks and reminders for package manager updates.

## vcpkg: Remove temporary patches

The v1.5.0 vcpkg port (`microsoft/vcpkg` PR #49633) includes two workarounds
for upstream bugs. Remove them once the release includes the fixes:

1. **`fix-arm64-platform-define.patch`** — `notifier_common.h` defined `_ARM_`
   instead of `_ARM64_` on ARM64 Windows, causing `winnt.h` compilation
   failures. Fixed on `main`. Remove the patch file and the `PATCHES` block
   from `portfile.cmake`.

2. **`-DDISPENSO_SHARED_LIB=${DISPENSO_SHARED}`** — dispenso's
   `DISPENSO_SHARED_LIB` option ignored `BUILD_SHARED_LIBS`, producing DLLs in
   static triplets. Fixed on `main` (defaults to `BUILD_SHARED_LIBS` when set).
   Remove the `string(COMPARE EQUAL ...)` line and the `-DDISPENSO_SHARED_LIB`
   option from `portfile.cmake`.


================================================
FILE: docs/design/roadmap.md
================================================
# Dispenso Roadmap

This document tracks planned features and improvements for the dispenso library.

## In Progress

| Feature | Status | Notes |
|---------|--------|-------|
| vcpkg package | In progress | External PR to microsoft/vcpkg |
| Conan package | In progress | External PR to conan-center-index |
| ConcurrentHashMap | In progress | High-value concurrent container |

## Planned

### High Priority

| Feature | Description | Doc |
|---------|-------------|-----|
| Parallel sorting | `dispenso::sort` and MSD radix hybrid | [parallel_algorithms.md](parallel_algorithms.md) |
| Parallel algorithms (Phase 1) | for_each, transform, fill, reduce | [parallel_algorithms.md](parallel_algorithms.md) |
| C++20 concepts | Better error messages with concept constraints | [cpp20_concepts.md](cpp20_concepts.md) |
| Benchmark automation | Script to run benchmarks and generate charts | See benchmarks/ |
| Compiler Explorer examples | Godbolt links in README for try-it-now experience | - |

### Medium Priority

| Feature | Description | Doc |
|---------|-------------|-----|
| Scalable allocator | Thread-caching allocator to eliminate malloc contention in concurrent growth (ConcurrentVector `parallel` is 3-5x faster with tcmalloc/jemalloc vs glibc) | - |
| Parallel algorithms (Phase 2-3) | Search, count, copy, replace | [parallel_algorithms.md](parallel_algorithms.md) |
| Barrier/Semaphore | C++20-style synchronization for C++14/17 | - |
| ConcurrentQueue | Public API for blocking MPMC queue | - |

### Lower Priority

| Feature | Description | Doc |
|---------|-------------|-----|
| Parallel algorithms (Phase 4-5) | Sorting, scan, unique | [parallel_algorithms.md](parallel_algorithms.md) |
| Coroutine integration | Coroutine-based task scheduling | [coroutines.md](coroutines.md) |
| Single-header amalgamation | Full library in one header | - |

## ConcurrentVector Optimization Notes

### Optimizations Applied (Default Traits)

Three categories of optimization have been applied to ConcurrentVector:

1. **Inline asm `bsr` for `detail::log2`** on x86 GCC/Clang, plus 32-bit
   overloads and `unsigned long` disambiguation for macOS. Prevents Clang from
   decomposing `63 - __builtin_clzll` back into `bsrq + xorq` when inlined
   into arithmetic.

2. **Platform-adaptive `bucketAndSubIndexForIndex`**: branching fast path
   (early return for `index < firstBucketLen_`) on MSVC and ARM where branch
   predictors handle the sequential pattern well; branchless cmov path on
   Clang/GCC x86 where cmovs avoid misprediction penalties.

3. **Non-atomic buffer pointer cache (`cachedPtrs_[]`)** on non-ARM platforms.
   Packs 8 pointers per cache line (vs 1 per line for `AlignedAtomic
   buffers_[]`), dramatically improving `operator[]` and iterator read paths.
   Disabled on ARM where cache-line invalidation on every write exceeds the
   read benefit. Cache stores are ordered before the release store to
   `buffers_[]`, so any acquire on `buffers_[]` guarantees cache visibility.

### Impact on Alternative Traits

| Trait | Values | Optimization Interaction |
|-------|--------|--------------------------|
| `kPreferBuffersInline` | `false` | Cache is *more* valuable — bypasses the extra indirection through heap-allocated `buffers_[]` pointer |
| `kIteratorPreferSpeed` | `false` (compact iterator) | Benefits *disproportionately* — compact iterator calls `operator[]` (and thus `cachedBuffer` + `bucketAndSubIndexForIndex`) on every dereference, vs speed iterator which only calls on bucket transitions |
| `kReallocStrategy` | `kHalfBufferAhead`, `kFullBufferAhead` | No interaction — earlier allocation just means cache is populated earlier |

**Conclusion:** All optimizations apply uniformly across trait combinations.
The current defaults (`kPreferBuffersInline=true`, `kIteratorPreferSpeed=true`,
`kReallocStrategy=kAsNeeded`) remain the best general-purpose configuration.
The compact iterator (`kIteratorPreferSpeed=false`) benefits the most from the
`cachedPtrs_` and `log2` optimizations in relative terms, since it hits the
indexed access path on every element access.

### Future Work

- **Server ARM (Graviton) benchmarking**: The `DISPENSO_HAS_CACHED_PTRS`
  guard currently disables the cache on all ARM (`__aarch64__`). If server ARM
  shows different cache pressure characteristics than mobile ARM, a more
  targeted guard could re-enable it selectively.
- **Scalable allocator** (see Medium Priority above): Concurrent growth
  benchmarks show 3-5x improvement with tcmalloc/jemalloc vs glibc malloc,
  suggesting a thread-caching allocator would benefit all trait combinations.

## Completed

| Feature | Version | Notes |
|---------|---------|-------|
| `dispenso.h` convenience header | 1.5.0 | Includes all public headers |
| `util.h` public utilities | 1.5.0 | Exposes internal utilities |
| OpenMP migration guide | 1.4.x | docs/migrating_from_openmp.md |
| TBB migration guide | 1.4.x | docs/migrating_from_tbb.md |
| awesome-cpp listing | - | Listed in fffaraz/awesome-cpp |
| awesome-modern-cpp listing | - | Listed in rigtorp/awesome-modern-cpp |

## External Submissions

| Target | Status | Notes |
|--------|--------|-------|
| awesome-cpp | Listed | fffaraz/awesome-cpp |
| awesome-modern-cpp | Listed | rigtorp/awesome-modern-cpp |
| awesome-high-performance-computing | Listed | Already present in dstansby/awesome-high-performance-computing |
| awesome-scientific-computing | Not applicable | Focus is numerical methods, not parallelism libraries |
| awesome-hpc | Not applicable | Focus is cluster infrastructure, not app-level parallelism |
| vcpkg | In progress | - |
| Conan | In progress | - |

## Subcomponent Roadmaps

| Component | Doc |
|-----------|-----|
| dispenso::fast_math | [fast_math_roadmap.md](fast_math_roadmap.md) |

## Ideas / Backlog

These are ideas that may be pursued based on community feedback:

- CUDA graph mappings (TaskFlow has this; worth exploring for dispenso's Graph)
- Lock-free stack
- Range-based API wrappers (explicit opt-in)
- SIMD-optimized algorithms
- Integration examples (game engines, scientific computing)
- Discord/Slack community channel
- Windows thread pool wake strategy tuning:
  - Current approach uses unconditional `WakeByAddressAll` in `wakeN()`. The primary benefit is **parallel wake-up**: all threads begin their OS-level wake simultaneously, rather than serially. Thread wake-up latency (scheduler, context switch, cache warm-up) dominates — the `WakeByAddress` syscall itself is fast (<1 us)
  - Secondary benefit: the scheduling thread makes one syscall instead of N, freeing it to enqueue work or start its own computation sooner. This matters because the scheduling thread cannot worksteal until the queue is saturated, so every microsecond spent in wake syscalls is a microsecond of dead time where neither the caller nor the (still-sleeping) pool threads are making progress
  - The increased spin constants (kBackoffYield=100, kBackoffSleep=120 vs Linux's 50/55) are complementary: keeping threads in spin phase longer avoids the expensive wake-up latency entirely. Sub-microsecond spin cost vs 10s-of-microsecond wake-up cost
  - Benchmarks showed WakeAll outperforms the macOS-style heuristic (`n >= sleeping/2` → wakeAll, else wake individually), but thresholds N/2, N/3, N/4 were tested — not smaller fractions like N/16 that might limit thundering herd while still avoiding serial wake overhead
  - The spin constant and WakeAll changes were not benchmarked independently
  - Investigate: (1) whether a lower threshold (e.g., `n < sleeping/16`) balances thundering herd vs parallel wake benefit, (2) whether the spin constants should be tuned independently, (3) steady-state single-task patterns where WakeAll wakes unnecessary threads
  - Requires Windows benchmarking access to validate
- NUMA and topology awareness (phased):
  - Windows processor group support for >64 threads (less critical as newer Windows versions handle this automatically)
  - Topology query API: expose NUMA node count, core-to-node mapping, and inter-node distances (Linux: `/sys/devices/system/node/` or `libnuma`; Windows: `GetLogicalProcessorInformationEx`)
  - Per-NUMA-node thread pools: opt-in pool construction affinitized to a specific node, composable with existing TaskSet/Future APIs
  - NUMA-aware allocator: STL-compatible allocator for node-local allocation (`mbind`/`numa_alloc_onnode` on Linux, `VirtualAllocExNuma` on Windows), paired with first-touch initialization guidance


================================================
FILE: docs/getting_started.md
================================================
# Getting Started {#getting_started}

This guide walks through the core features of dispenso with working examples.
Each section includes a complete, compilable example that you can build and run.

## Installation

See the [README](https://github.com/facebookincubator/dispenso) for installation
instructions. Dispenso requires C++14 and CMake 3.12+.

To build the examples:

```bash
mkdir build && cd build
cmake .. -DDISPENSO_BUILD_EXAMPLES=ON
make
```

## Basic Concepts

### Thread Pools

At the heart of dispenso is the `ThreadPool`. A thread pool manages a set of
worker threads that execute tasks. You can use the global thread pool or create
your own:

```cpp
#include <dispenso/thread_pool.h>

// Use the global thread pool (recommended for most cases)
dispenso::ThreadPool& pool = dispenso::globalThreadPool();

// Or create a custom pool with a specific number of threads
dispenso::ThreadPool myPool(4);  // 4 worker threads
```

> **Note:** `globalThreadPool()` defaults to `std::thread::hardware_concurrency() - 1`
> worker threads, since the calling thread typically participates in computation.
> Use `dispenso::resizeGlobalThreadPool(n)` to change it.

### Task Sets

A `TaskSet` groups related tasks and provides a way to wait for their completion:

```cpp
#include <dispenso/task_set.h>

dispenso::TaskSet taskSet(dispenso::globalThreadPool());

taskSet.schedule([]() { /* task 1 */ });
taskSet.schedule([]() { /* task 2 */ });

taskSet.wait();  // Block until all tasks complete
```

---

## Your First Parallel Loop

The simplest way to parallelize work is with `parallel_for`. It distributes
loop iterations across available threads.

<!-- @example parallel_for_example.cpp -->

**Simple per-element parallel loop:**

```cpp
#include <dispenso/parallel_for.h>

// Process each element independently in parallel
dispenso::parallel_for(0, kArraySize, [&](size_t i) { output[i] = std::sqrt(input[i]); });
```

**Reduction with per-thread state:**

```cpp
std::vector<double> partialSums;
dispenso::parallel_for(
    partialSums,
    []() { return 0.0; }, // State initializer
    size_t{0},
    kArraySize,
    [&](double& localSum, size_t start, size_t end) {
      for (size_t i = start; i < end; ++i) {
        localSum += input[i];
      }
    });

// Combine partial sums
double totalSum = 0.0;
for (double partial : partialSums) {
  totalSum += partial;
}
```

See [full example](../examples/parallel_for_example.cpp).

Key points:
- Use the simple form for independent per-element work
- Use chunked ranges when you want to control work distribution
- Per-thread state enables efficient reductions
- Options let you control parallelism and chunking strategy

---

## Parallel Iteration with for_each

When you have a container rather than an index range, use `for_each`:

<!-- @example for_each_example.cpp -->

**Parallel for_each on a vector:**

```cpp
#include <dispenso/for_each.h>

std::vector<double> values = {1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0};

// Apply square root to each element in parallel
dispenso::for_each(values.begin(), values.end(), [](double& val) { val = std::sqrt(val); });
```

**for_each_n with explicit count:**

```cpp
std::vector<int> partial = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100};

// Only process first 5 elements
dispenso::for_each_n(partial.begin(), 5, [](int& n) { n += 100; });
```

See [full example](../examples/for_each_example.cpp).

Key points:
- Works with any iterator type (including non-random-access iterators)
- `for_each_n` takes an explicit count
- Pass a `TaskSet` for external synchronization control

---

## Working with Tasks

For more complex task patterns, use `TaskSet` and `ConcurrentTaskSet` directly:

<!-- @example task_set_example.cpp -->

**Basic TaskSet:**

```cpp
#include <dispenso/task_set.h>

dispenso::TaskSet taskSet(dispenso::globalThreadPool());
std::atomic<int> counter(0);

for (int i = 0; i < 10; ++i) {
  taskSet.schedule([&counter, i]() { counter.fetch_add(i, std::memory_order_relaxed); });
}

taskSet.wait();
```

**ConcurrentTaskSet with nested scheduling:**

```cpp
dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
std::atomic<int> total(0);

for (int i = 0; i < 5; ++i) {
  taskSet.schedule([&taskSet, &total, i]() {
    // Each task schedules two sub-tasks
    for (int j = 0; j < 2; ++j) {
      taskSet.schedule(
          [&total, i, j]() { total.fetch_add(i * 10 + j, std::memory_order_relaxed); });
    }
  });
}

taskSet.wait();
```

See [full example](../examples/task_set_example.cpp).

Key points:
- `TaskSet` is for single-threaded scheduling
- `ConcurrentTaskSet` allows scheduling from multiple threads
- Both support cancellation for cooperative early termination
- The destructor waits for all tasks to complete

---

## Futures for Async Results

When you need return values from async operations, use `Future`:

<!-- @example future_example.cpp -->

**Basic async and get:**

```cpp
#include <dispenso/future.h>

dispenso::Future<int> future = dispenso::async([]() {
  int result = 0;
  for (int i = 1; i <= 100; ++i) {
    result += i;
  }
  return result;
});

int result = future.get();  // blocks until ready
```

**Chaining with then():**

```cpp
dispenso::Future<double> chainedFuture = dispenso::async([]() {
                                           return 16.0;
                                         })
                                             .then([](dispenso::Future<double>&& prev) {
                                               return std::sqrt(prev.get());
                                             })
                                             .then([](dispenso::Future<double>&& prev) {
                                               return prev.get() * 2.0;
                                             });
```

**when_all for multiple futures:**

```cpp
dispenso::Future<int> f1 = dispenso::async([]() { return 10; });
dispenso::Future<int> f2 = dispenso::async([]() { return 20; });
dispenso::Future<int> f3 = dispenso::async([]() { return 30; });

auto allFutures = dispenso::when_all(std::move(f1), std::move(f2), std::move(f3));
auto tuple = allFutures.get();
int sum = std::get<0>(tuple).get() + std::get<1>(tuple).get() + std::get<2>(tuple).get();
```

See [full example](../examples/future_example.cpp).

Key points:
- `async()` launches work and returns a `Future`
- `then()` chains dependent computations
- `when_all()` waits for multiple futures
- `make_ready_future()` creates an already-completed future

---

## Task Graphs

For complex dependency patterns, build a task graph:

<!-- @example graph_example.cpp -->

**Diamond dependency pattern:**

```cpp
#include <dispenso/graph.h>
#include <dispenso/graph_executor.h>

//         A
//        / \
//       B   C
//        \ /
//         D
dispenso::Graph graph;

dispenso::Node& A = graph.addNode([&]() { r[0] = 1.0f; });
dispenso::Node& B = graph.addNode([&]() { r[1] = r[0] * 2.0f; });
dispenso::Node& C = graph.addNode([&]() { r[2] = r[0] + 5.0f; });
dispenso::Node& D = graph.addNode([&]() { r[3] = r[1] + r[2]; });

B.dependsOn(A);
C.dependsOn(A);
D.dependsOn(B, C);

setAllNodesIncomplete(graph);
dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
dispenso::ConcurrentTaskSetExecutor executor;
executor(taskSet, graph);
```

See [full example](../examples/graph_example.cpp).

Key points:
- Use `dependsOn()` to specify prerequisites
- Multiple executors available: single-thread, parallel_for, ConcurrentTaskSet
- Graphs can be re-executed after calling `setAllNodesIncomplete()`
- Subgraphs help organize large graphs

---

## Pipelines

For streaming data through stages, use pipelines:

<!-- @example pipeline_example.cpp -->

**3-stage pipeline (generator -> transform -> sink):**

```cpp
#include <dispenso/pipeline.h>

std::vector<int> results;
int counter = 0;

dispenso::pipeline(
    // Stage 1: Generator - produces values
    [&counter]() -> dispenso::OpResult<int> {
      if (counter >= 10) {
        return {}; // Empty result signals end of input
      }
      return counter++;
    },
    // Stage 2: Transform - squares the value
    [](int value) { return value * value; },
    // Stage 3: Sink - collects results
    [&results](int value) { results.push_back(value); });
```

See [full example](../examples/pipeline_example.cpp).

Key points:
- Generator stage produces values (returns `OpResult<T>` or `std::optional<T>`)
- Transform stages process values (can filter by returning empty result)
- Sink stage consumes final values
- Use `stage()` with a limit for parallel stages

---

## Thread-Safe Containers

### ConcurrentVector

A vector that supports concurrent push_back and growth:

<!-- @example concurrent_vector_example.cpp -->

**Concurrent push_back from multiple threads:**

```cpp
#include <dispenso/concurrent_vector.h>
#include <dispenso/parallel_for.h>

dispenso::ConcurrentVector<int> vec;

dispenso::parallel_for(0, 1000, [&vec](size_t i) { vec.push_back(static_cast<int>(i)); });
```

**Iterator stability during concurrent modification:**

```cpp
dispenso::ConcurrentVector<int> vec;
vec.push_back(1);
vec.push_back(2);
vec.push_back(3);

auto it = vec.begin();
int& firstElement = *it;

// Push more elements concurrently
dispenso::parallel_for(0, 100, [&vec](size_t i) { vec.push_back(static_cast<int>(i + 100)); });

// Original iterator and reference are still valid
assert(*it == 1);
assert(firstElement == 1);
```

See [full example](../examples/concurrent_vector_example.cpp).

Key points:
- Iterators and references remain stable during growth
- Use `grow_by()` for efficient batch insertion
- Reserve capacity upfront when size is known
- Not all operations are concurrent-safe (see docs)

---

## Synchronization Primitives

### Latch

A one-shot barrier for thread synchronization:

<!-- @example latch_example.cpp -->

**count_down + wait pattern:**

```cpp
#include <dispenso/latch.h>

constexpr int kNumWorkers = 3;
dispenso::Latch workComplete(kNumWorkers);
std::vector<int> results(kNumWorkers, 0);

dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

for (int i = 0; i < kNumWorkers; ++i) {
  taskSet.schedule([&workComplete, &results, i]() {
    results[static_cast<size_t>(i)] = (i + 1) * 10;
    workComplete.count_down();  // Signal work is done (non-blocking)
  });
}

workComplete.wait();  // Main thread waits for all workers
```

See [full example](../examples/latch_example.cpp).

Key points:
- `arrive_and_wait()` decrements and blocks
- `count_down()` decrements without blocking
- `wait()` blocks without decrementing
- Cannot be reset (one-shot)

---

## Resource Pooling

Manage expensive-to-create resources with `ResourcePool`:

<!-- @example resource_pool_example.cpp -->

**Basic buffer pool with RAII:**

```cpp
#include <dispenso/resource_pool.h>

// Create a pool of 4 buffers
dispenso::ResourcePool<Buffer> bufferPool(4, []() { return Buffer(); });

dispenso::parallel_for(0, 100, [&bufferPool](size_t i) {
  // Acquire a resource from the pool (blocks if none available)
  auto resource = bufferPool.acquire();

  // Use the resource
  resource.get().process(static_cast<int>(i));

  // Resource automatically returned to pool when 'resource' goes out of scope
});
```

See [full example](../examples/resource_pool_example.cpp).

Key points:
- Resources automatically return to pool when RAII wrapper destructs
- `acquire()` blocks if no resources available
- Good for database connections, buffers, etc.
- Can be used to limit concurrency

---

## Next Steps

- Browse the [API Reference](modules.html) for complete documentation
- Check out the [tests](https://github.com/facebookincubator/dispenso/tree/main/tests)
  for more usage examples
- See the [benchmarks](https://github.com/facebookincubator/dispenso/tree/main/benchmarks)
  for performance testing patterns


================================================
FILE: docs/groups.dox
================================================
/**
 * @defgroup group_core Core Components
 * @brief Thread pools and task management fundamentals
 *
 * The core components provide the foundation for all parallel execution in dispenso.
 * This includes the ThreadPool class which is the heart of dispenso, as well as
 * TaskSet for managing groups of related tasks, and supporting types like
 * OnceFunction and Schedulable.
 *
 * ### Example: Using ThreadPool and TaskSet
 * @code
 * #include <dispenso/thread_pool.h>
 * #include <dispenso/task_set.h>
 *
 * // Use the global thread pool
 * dispenso::TaskSet tasks(dispenso::globalThreadPool());
 *
 * // Schedule multiple tasks
 * tasks.schedule([]() { /* task 1 */ });
 * tasks.schedule([]() { /* task 2 */ });
 * tasks.schedule([]() { /* task 3 */ });
 *
 * // Wait for all tasks to complete
 * tasks.wait();
 * @endcode
 */

/**
 * @defgroup group_parallel Parallel Loops
 * @brief Parallel for loops and iteration
 *
 * Functions for performing parallel for loops and parallel iteration over ranges.
 * These provide simple, high-level APIs for data-parallel operations.
 *
 * ### Example: Basic parallel_for
 * @code
 * #include <dispenso/parallel_for.h>
 *
 * std::vector<float> data(1000);
 *
 * // Process elements in parallel
 * dispenso::parallel_for(0, data.size(), [&](size_t i) {
 *     data[i] = std::sin(static_cast<float>(i));
 * });
 * @endcode
 *
 * ### Example: Parallel for_each
 * @code
 * #include <dispenso/for_each.h>
 *
 * std::vector<int> values = {1, 2, 3, 4, 5};
 *
 * dispenso::for_each(values.begin(), values.end(), [](int& v) {
 *     v *= 2;  // Double each value in parallel
 * });
 * @endcode
 */

/**
 * @defgroup group_async Async & Futures
 * @brief Asynchronous execution and future results
 *
 * Futures, async requests, completion events, and timed tasks for asynchronous
 * programming patterns.
 *
 * ### Example: Using Futures
 * @code
 * #include <dispenso/future.h>
 *
 * // Create a future that computes a value asynchronously
 * dispenso::Future<int> future(
 *     []() { return 42; },
 *     dispenso::globalThreadPool()
 * );
 *
 * // Do other work...
 *
 * // Get the result (blocks if not ready)
 * int result = future.get();
 * @endcode
 *
 * ### Example: CompletionEvent for signaling
 * @code
 * #include <dispenso/completion_event.h>
 *
 * dispenso::CompletionEvent event;
 *
 * // In producer thread:
 * event.notify();
 *
 * // In consumer thread:
 * event.wait();  // Blocks until notify() is called
 * @endcode
 */

/**
 * @defgroup group_graph Graphs & Pipelines
 * @brief Task graph execution and pipeline processing
 *
 * Task graphs allow expressing complex dependencies between tasks, while pipelines
 * provide a streaming model for processing data through multiple stages.
 *
 * ### Example: Task Graph with Dependencies
 * @code
 * #include <dispenso/graph.h>
 * #include <dispenso/graph_executor.h>
 *
 * dispenso::Graph graph;
 *
 * // Create nodes
 * auto& nodeA = graph.addNode([]() { /* compute A */ });
 * auto& nodeB = graph.addNode([]() { /* compute B */ });
 * auto& nodeC = graph.addNode([]() { /* uses A and B */ });
 *
 * // C depends on A and B completing first
 * nodeC.dependsOn(nodeA, nodeB);
 *
 * // Execute the graph
 * dispenso::TaskSet tasks(dispenso::globalThreadPool());
 * dispenso::ParallelForExecutor executor;
 * executor(tasks, graph);
 * @endcode
 *
 * ### Example: Pipeline Processing
 * @code
 * #include <dispenso/pipeline.h>
 *
 * int itemIndex = 0;
 *
 * dispenso::pipeline(
 *     // Generator stage: produces items
 *     [&]() -> dispenso::OpResult<int> {
 *         if (itemIndex < 100) return itemIndex++;
 *         return {};  // Empty result signals completion
 *     },
 *     // Transform stage: process in parallel
 *     dispenso::stage([](int x) { return x * 2; }, 4),
 *     // Sink stage: consume results
 *     [](int result) { /* use result */ }
 * );
 * @endcode
 */

/**
 * @defgroup group_containers Concurrent Containers
 * @brief Thread-safe data structures
 *
 * Thread-safe containers including concurrent vectors, object arenas, and
 * resource pools for managing shared data in parallel programs.
 *
 * ### Example: ConcurrentVector
 * @code
 * #include <dispenso/concurrent_vector.h>
 * #include <dispenso/parallel_for.h>
 *
 * dispenso::ConcurrentVector<int> results;
 *
 * dispenso::parallel_for(0, 1000, [&](size_t i) {
 *     // Safe to call from multiple threads simultaneously
 *     results.push_back(static_cast<int>(i * i));
 * });
 * @endcode
 *
 * ### Example: ResourcePool
 * @code
 * #include <dispenso/resource_pool.h>
 *
 * // Pool of 4 reusable connections
 * dispenso::ResourcePool<Connection> pool(4, []() {
 *     return Connection();
 * });
 *
 * {
 *     auto conn = pool.acquire();  // Blocks if none available
 *     conn->doWork();
 * }  // Connection automatically returned to pool
 * @endcode
 */

/**
 * @defgroup group_sync Synchronization
 * @brief Synchronization primitives
 *
 * Locks, latches, and other synchronization primitives for coordinating
 * between threads.
 *
 * ### Example: Latch for Thread Coordination
 * @code
 * #include <dispenso/latch.h>
 * #include <dispenso/parallel_for.h>
 *
 * dispenso::Latch latch(4);  // Wait for 4 threads
 *
 * dispenso::parallel_for(0, 4, [&](size_t) {
 *     // Do initialization work...
 *     latch.arrive_and_wait();  // Synchronize before continuing
 *     // All threads proceed together from here
 * });
 * @endcode
 *
 * ### Example: RWLock for Reader/Writer Access
 * @code
 * #include <dispenso/rw_lock.h>
 *
 * dispenso::RWLock lock;
 * std::vector<int> sharedData;
 *
 * // Reader (multiple allowed concurrently)
 * {
 *     std::shared_lock<dispenso::RWLock> guard(lock);
 *     int value = sharedData[0];
 * }
 *
 * // Writer (exclusive access)
 * {
 *     std::unique_lock<dispenso::RWLock> guard(lock);
 *     sharedData.push_back(42);
 * }
 * @endcode
 */

/**
 * @defgroup group_alloc Allocators
 * @brief Memory allocation utilities
 *
 * Pool allocators and small buffer allocators for efficient memory management
 * in parallel programs.
 *
 * ### Example: SmallBufferAllocator
 * @code
 * #include <dispenso/small_buffer_allocator.h>
 *
 * // Allocator for 64-byte buffers
 * dispenso::SmallBufferAllocator<64> allocator;
 *
 * // Allocate and deallocate efficiently
 * char* buffer = allocator.alloc();
 * // ... use buffer ...
 * allocator.dealloc(buffer);
 * @endcode
 *
 * ### Example: PoolAllocator with Custom Backing
 * @code
 * #include <dispenso/pool_allocator.h>
 *
 * dispenso::PoolAllocator allocator(
 *     1024,        // chunk size
 *     1024 * 1024, // slab size
 *     [](size_t n) { return malloc(n); },
 *     [](void* p) { free(p); }
 * );
 *
 * char* chunk = allocator.alloc();
 * // ... use chunk ...
 * allocator.dealloc(chunk);
 * @endcode
 */

/**
 * @defgroup group_util Utilities
 * @brief Platform abstractions and helper utilities
 *
 * Platform-specific abstractions, timing utilities, thread identification,
 * priority control, and thread sanitizer annotations.
 *
 * ### Example: Timing
 * @code
 * #include <dispenso/timing.h>
 *
 * double start = dispenso::getTime();
 * // ... do work ...
 * double elapsed = dispenso::getTime() - start;
 * @endcode
 *
 * ### Example: Thread Priority
 * @code
 * #include <dispenso/priority.h>
 *
 * // Set current thread to high priority
 * dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kHigh);
 *
 * // Get current priority
 * auto priority = dispenso::getCurrentThreadPriority();
 * @endcode
 */


================================================
FILE: docs/header.html
================================================
<!-- HTML header for generated pages -->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="$langISO">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=11"/>
<meta name="generator" content="Doxygen $doxygenversion"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
<!--BEGIN DISABLE_INDEX-->
  <!--BEGIN FULL_SIDEBAR-->
<script type="text/javascript">var page_layout=1;</script>
  <!--END FULL_SIDEBAR-->
<!--END DISABLE_INDEX-->
<script type="text/javascript" src="$relpath^jquery.js"></script>
<script type="text/javascript" src="$relpath^dynsections.js"></script>
$treeview
$search
$mathjax
$darkmode
<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
$extrastylesheet
</head>
<body>
<!--BEGIN DISABLE_INDEX-->
  <!--BEGIN FULL_SIDEBAR-->
<div id="side-nav" class="ui-resizable side-nav-resizable"><!-- do not remove this div, it is closed by doxygen! -->
  <!--END FULL_SIDEBAR-->
<!--END DISABLE_INDEX-->

<div id="top"><!-- do not remove this div, it is closed by doxygen! -->

<!--BEGIN TITLEAREA-->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr id="projectrow">
  <!--BEGIN PROJECT_LOGO-->
  <td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td>
  <!--END PROJECT_LOGO-->
  <!--BEGIN PROJECT_NAME-->
  <td id="projectalign">
   <div id="projectname">$projectname<!--BEGIN PROJECT_NUMBER--><span id="projectnumber">&#160;$projectnumber</span><!--END PROJECT_NUMBER-->
   </div>
   <!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
  </td>
  <!--END PROJECT_NAME-->
  <!--BEGIN !PROJECT_NAME-->
   <!--BEGIN PROJECT_BRIEF-->
    <td>
    <div id="projectbrief">$projectbrief</div>
    </td>
   <!--END PROJECT_BRIEF-->
  <!--END !PROJECT_NAME-->
  <!--BEGIN DISABLE_INDEX-->
   <!--BEGIN SEARCHENGINE-->
     <!--BEGIN !FULL_SIDEBAR-->
    <td>$searchbox</td>
     <!--END !FULL_SIDEBAR-->
   <!--END SEARCHENGINE-->
  <!--END DISABLE_INDEX-->
 </tr>
  <!--BEGIN SEARCHENGINE-->
   <!--BEGIN FULL_SIDEBAR-->
    <tr><td colspan="2">$searchbox</td></tr>
   <!--END FULL_SIDEBAR-->
  <!--END SEARCHENGINE-->
 </tbody>
</table>
</div>
<!--END TITLEAREA-->
<!-- end header part -->


================================================
FILE: docs/mainpage.md
================================================
# Dispenso {#mainpage}

**A high-performance C++14 library for task parallelism**

Dispenso provides mechanisms for thread pools, task sets, parallel for loops,
futures, pipelines, task graphs, and more.

## Quick Start

```cpp
#include <dispenso/parallel_for.h>

dispenso::parallel_for(0, 1000, [](size_t i) {
    // Process item i in parallel
});
```

## Getting Started

New to dispenso? Start with the @ref getting_started "Getting Started Guide" for
step-by-step tutorials and working examples.

## API Modules

- @ref group_core "Core Components" - Thread pools and task sets
- @ref group_parallel "Parallel Loops" - parallel_for and for_each
- @ref group_async "Async & Futures" - Futures and async operations
- @ref group_graph "Graphs & Pipelines" - Task graphs and pipelines
- @ref group_containers "Concurrent Containers" - Thread-safe data structures
- @ref group_sync "Synchronization" - Locks, latches, and events
- @ref group_alloc "Allocators" - Memory allocation utilities
- @ref group_util "Utilities" - Platform, timing, and helpers

## Resources

- [GitHub Repository](https://github.com/facebookincubator/dispenso)


================================================
FILE: docs/migrating_from_openmp.md
================================================
# Migrating from OpenMP to Dispenso

This guide helps you migrate parallel code from OpenMP to dispenso. Dispenso offers
several advantages over OpenMP for many use cases, including better nested parallelism,
explicit thread pool control, and sanitizer-clean code.

## Why Migrate?

| Aspect | OpenMP | Dispenso |
|--------|--------|----------|
| **Nested parallelism** | Can cause thread explosion | Work-stealing prevents oversubscription |
| **Thread pool control** | Implicit, global | Explicit, multiple pools supported |
| **Sanitizer support** | Often problematic with TSAN | Clean with ASAN/TSAN |
| **Portability** | Requires compiler support | Pure C++14, any compiler |
| **Futures** | Not available | Full futures API |
| **Task graphs** | Limited | Rich graph support with partial re-execution |

## Quick Reference

| OpenMP | Dispenso |
|--------|----------|
| `#pragma omp parallel for` | `dispenso::parallel_for()` |
| `#pragma omp parallel for reduction(+:sum)` | `dispenso::parallel_for()` with `ParForChunking` + local accumulators |
| `#pragma omp critical` | `std::mutex` or `dispenso::RWLock` |
| `#pragma omp task` | `dispenso::TaskSet::schedule()` |
| `#pragma omp taskwait` | `dispenso::TaskSet::wait()` |
| `#pragma omp single` | Execute outside parallel region |
| `omp_get_num_threads()` | `pool.numThreads()` |
| `omp_get_thread_num()` | Not directly available (usually not needed) |

## Basic Parallel For

### OpenMP
```cpp
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
    process(data[i]);
}
```

### Dispenso
```cpp
#include <dispenso/parallel_for.h>

dispenso::parallel_for(0, N, [&](size_t i) {
    process(data[i]);
});
```

## Parallel For with Reduction

### OpenMP
```cpp
double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
for (int i = 0; i < N; ++i) {
    sum += compute(data[i]);
}
```

### Dispenso

Dispenso doesn't have built-in reduction syntax, but you can achieve the same result
with thread-local accumulators:

```cpp
#include <dispenso/parallel_for.h>

std::atomic<double> sum{0.0};
dispenso::parallel_for(0, N, [&](size_t i) {
    // For simple reductions, atomic works well
    double val = compute(data[i]);
    double expected = sum.load();
    while (!sum.compare_exchange_weak(expected, expected + val)) {}
});
```

For better performance with many updates, use chunked iteration with local accumulators:

```cpp
#include <dispenso/parallel_for.h>
#include <mutex>

double sum = 0.0;
std::mutex sumMutex;

dispenso::parallel_for(
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kStatic),
    [&](size_t begin, size_t end) {
        double localSum = 0.0;
        for (size_t i = begin; i < end; ++i) {
            localSum += compute(data[i]);
        }
        std::lock_guard<std::mutex> lock(sumMutex);
        sum += localSum;
    });
```

Or use the state-per-thread overload of `parallel_for`:

```cpp
#include <dispenso/parallel_for.h>

std::vector<double> partialSums;
dispenso::parallel_for(
    partialSums,
    []() { return 0.0; },  // Initialize each thread's state
    size_t{0}, static_cast<size_t>(N),
    [&](double& localSum, size_t begin, size_t end) {
        for (size_t i = begin; i < end; ++i) {
            localSum += compute(data[i]);
        }
    });

double sum = 0.0;
for (double partial : partialSums) {
    sum += partial;
}
```

## Nested Parallel Loops

This is where dispenso shines. OpenMP can create exponentially many threads with
nested parallel regions, while dispenso's work-stealing handles this gracefully.

### OpenMP (Problematic)
```cpp
// WARNING: With OpenMP, this can create numThreads^2 threads!
#pragma omp parallel for
for (int i = 0; i < M; ++i) {
    #pragma omp parallel for
    for (int j = 0; j < N; ++j) {
        process(i, j);
    }
}
```

### Dispenso (Safe)
```cpp
#include <dispenso/parallel_for.h>

// Dispenso uses work-stealing - nested parallelism is safe and efficient
dispenso::parallel_for(0, M, [&](size_t i) {
    dispenso::parallel_for(0, N, [&](size_t j) {
        process(i, j);
    });
});
```

With dispenso, the total number of threads is bounded by the thread pool size,
regardless of nesting depth.

## Critical Sections

### OpenMP
```cpp
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
    double val = compute(data[i]);
    #pragma omp critical
    {
        results.push_back(val);
    }
}
```

### Dispenso
```cpp
#include <dispenso/parallel_for.h>
#include <mutex>

std::mutex resultsMutex;
dispenso::parallel_for(0, N, [&](size_t i) {
    double val = compute(data[i]);
    std::lock_guard<std::mutex> lock(resultsMutex);
    results.push_back(val);
});
```

Or use `dispenso::ConcurrentVector` to avoid locking entirely:

```cpp
#include <dispenso/parallel_for.h>
#include <dispenso/concurrent_vector.h>

dispenso::ConcurrentVector<double> results;
dispenso::parallel_for(0, N, [&](size_t i) {
    double val = compute(data[i]);
    results.push_back(val);
});
```

## Task Parallelism

### OpenMP
```cpp
#pragma omp parallel
{
    #pragma omp single
    {
        #pragma omp task
        taskA();

        #pragma omp task
        taskB();

        #pragma omp taskwait
        // A and B are done

        #pragma omp task
        taskC();
    }
}
```

### Dispenso
```cpp
#include <dispenso/task_set.h>

dispenso::TaskSet tasks(dispenso::globalThreadPool());

tasks.schedule(taskA);
tasks.schedule(taskB);
tasks.wait();  // A and B are done

tasks.schedule(taskC);
// TaskSet destructor waits for C
```

## Controlling Thread Count

### OpenMP
```cpp
omp_set_num_threads(4);
// or
#pragma omp parallel for num_threads(4)
```

### Dispenso

Create a thread pool with the desired number of threads:

```cpp
#include <dispenso/thread_pool.h>
#include <dispenso/parallel_for.h>

dispenso::ThreadPool pool(4);  // 4 threads

dispenso::parallel_for(pool, 0, N, [&](size_t i) {
    process(data[i]);
});
```

Or use `ParForOptions` to limit parallelism:

```cpp
dispenso::ParForOptions options;
options.maxThreads = 4;

dispenso::parallel_for(options, 0, N, [&](size_t i) {
    process(data[i]);
});
```

## Conditional Parallelism

### OpenMP
```cpp
#pragma omp parallel for if(N > 1000)
for (int i = 0; i < N; ++i) {
    process(data[i]);
}
```

### Dispenso
```cpp
if (N > 1000) {
    dispenso::parallel_for(0, N, [&](size_t i) {
        process(data[i]);
    });
} else {
    for (size_t i = 0; i < N; ++i) {
        process(data[i]);
    }
}
```

Or use `minItemsPerChunk` to let dispenso decide:

```cpp
dispenso::ParForOptions options;
options.minItemsPerChunk = 100;  // Don't parallelize if fewer than 100 items per thread

dispenso::parallel_for(options, 0, N, [&](size_t i) {
    process(data[i]);
});
```

## Static vs Dynamic Scheduling

### OpenMP
```cpp
// Static scheduling
#pragma omp parallel for schedule(static)
for (int i = 0; i < N; ++i) { ... }

// Dynamic scheduling
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < N; ++i) { ... }
```

### Dispenso
```cpp
#include <dispenso/parallel_for.h>

// Static-like chunking
dispenso::parallel_for(
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kStatic),
    [&](size_t begin, size_t end) {
        for (size_t i = begin; i < end; ++i) {
            process(data[i]);
        }
    });

// Dynamic-like (auto) chunking - adapts to workload
dispenso::parallel_for(
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kAuto),
    [&](size_t begin, size_t end) {
        for (size_t i = begin; i < end; ++i) {
            process(data[i]);
        }
    });
```

## Thread-Local Storage

### OpenMP
```cpp
#pragma omp threadprivate(myThreadLocalVar)
int myThreadLocalVar;

#pragma omp parallel
{
    myThreadLocalVar = omp_get_thread_num();
    // use myThreadLocalVar
}
```

### Dispenso

Use C++11 `thread_local` or the state-per-thread `parallel_for` overload:

```cpp
// Option 1: C++ thread_local
thread_local int myThreadLocalVar;

dispenso::parallel_for(0, N, [&](size_t i) {
    // myThreadLocalVar is thread-local
});

// Option 2: State-per-thread parallel_for
std::vector<MyState> states;
dispenso::parallel_for(
    states,
    []() { return MyState{}; },  // Initialize
    0, N,
    [&](MyState& state, size_t begin, size_t end) {
        // 'state' is unique to this thread
    });
```

## Common Pitfalls When Migrating

### 1. Lambda Captures

OpenMP uses shared variables by default. With dispenso, be explicit about captures:

```cpp
// OpenMP - x is shared by default
int x = 0;
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
    // x is shared
}

// Dispenso - be explicit
int x = 0;
dispenso::parallel_for(0, N, [&x](size_t i) {  // Capture x by reference
    // x is shared
});
```

### 2. Index Types

OpenMP typically uses `int`. Dispenso uses `size_t`:

```cpp
// OpenMP
#pragma omp parallel for
for (int i = 0; i < N; ++i) { ... }

// Dispenso - use size_t
dispenso::parallel_for(size_t{0}, static_cast<size_t>(N), [&](size_t i) {
    // ...
});
```

### 3. Return Values

OpenMP parallel regions don't return values. With dispenso futures, you can:

```cpp
#include <dispenso/future.h>

auto future = dispenso::async([]() {
    return expensiveComputation();
});

// Do other work...

int result = future.get();  // Get the result
```

## Performance Tips

1. **Use chunked ranges** for better cache locality when iteration order doesn't matter

2. **Avoid over-synchronization** - dispenso's `ConcurrentVector` is often faster than
   mutex-protected `std::vector`

3. **Reuse thread pools** - creating pools is expensive; create once and reuse

4. **Consider static chunking** for uniform workloads, auto chunking for variable workloads

## Further Reading

- [Dispenso Documentation](https://facebookincubator.github.io/dispenso)
- [Getting Started Guide](getting_started.md)
- [parallel_for API Reference](https://facebookincubator.github.io/dispenso/group__parallel.html)


================================================
FILE: docs/migrating_from_tbb.md
================================================
# Migrating from Intel TBB to Dispenso

This guide helps you migrate parallel code from Intel Threading Building Blocks (TBB)
to dispenso. While TBB has more features overall, dispenso offers advantages in
several areas and provides a simpler, more focused API.

## Why Migrate?

| Aspect | TBB | Dispenso |
|--------|-----|----------|
| **Sanitizer support** | Often problematic with ASAN/TSAN | Clean with all sanitizers |
| **Futures** | Not available | Full std::experimental::future-like API |
| **API complexity** | Large, complex API | Focused, simpler API |
| **Dependencies** | Heavy library | Minimal dependencies |
| **Non-Intel hardware** | May not be optimized | Platform-neutral implementation |
| **Nested parallelism** | Good | Excellent (work-stealing optimized) |

## Quick Reference

| TBB | Dispenso |
|-----|----------|
| `tbb::parallel_for` | `dispenso::parallel_for` |
| `tbb::parallel_reduce` | `dispenso::parallel_for` with state |
| `tbb::parallel_for_each` | `dispenso::for_each` |
| `tbb::task_group` | `dispenso::TaskSet` |
| `tbb::task_group::run` | `dispenso::TaskSet::schedule` |
| `tbb::task_group::wait` | `dispenso::TaskSet::wait` |
| `tbb::concurrent_vector` | `dispenso::ConcurrentVector` |
| `tbb::flow::graph` | `dispenso::Graph` |
| `tbb::global_control` | `dispenso::ThreadPool` configuration |
| `tbb::task_arena` | `dispenso::ThreadPool` (multiple pools) |

## Parallel For with Index

### TBB
```cpp
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>

tbb::parallel_for(tbb::blocked_range<size_t>(0, N),
    [&](const tbb::blocked_range<size_t>& range) {
        for (size_t i = range.begin(); i < range.end(); ++i) {
            process(data[i]);
        }
    });
```

### Dispenso
```cpp
#include <dispenso/parallel_for.h>

// Simple form - per-element
dispenso::parallel_for(0, N, [&](size_t i) {
    process(data[i]);
});

// Chunked form - like TBB's blocked_range
dispenso::parallel_for(
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kAuto),
    [&](size_t begin, size_t end) {
        for (size_t i = begin; i < end; ++i) {
            process(data[i]);
        }
    });
```

## Parallel Reduce

### TBB
```cpp
#include <tbb/parallel_reduce.h>
#include <tbb/blocked_range.h>

double sum = tbb::parallel_reduce(
    tbb::blocked_range<size_t>(0, N),
    0.0,
    [&](const tbb::blocked_range<size_t>& range, double init) {
        for (size_t i = range.begin(); i < range.end(); ++i) {
            init += compute(data[i]);
        }
        return init;
    },
    std::plus<double>()
);
```

### Dispenso

Use the state-per-thread `parallel_for` overload:

```cpp
#include <dispenso/parallel_for.h>

std::vector<double> partialSums;
dispenso::parallel_for(
    partialSums,
    []() { return 0.0; },  // Initialize each thread's accumulator
    size_t{0}, N,
    [&](double& localSum, size_t begin, size_t end) {
        for (size_t i = begin; i < end; ++i) {
            localSum += compute(data[i]);
        }
    });

double sum = 0.0;
for (double partial : partialSums) {
    sum += partial;
}
```

## Parallel For Each

### TBB
```cpp
#include <tbb/parallel_for_each.h>

std::vector<Item> items;
tbb::parallel_for_each(items.begin(), items.end(), [](Item& item) {
    process(item);
});
```

### Dispenso
```cpp
#include <dispenso/for_each.h>

std::vector<Item> items;
dispenso::for_each(items.begin(), items.end(), [](Item& item) {
    process(item);
});
```

## Task Groups

### TBB
```cpp
#include <tbb/task_group.h>

tbb::task_group tg;
tg.run([]{ taskA(); });
tg.run([]{ taskB(); });
tg.wait();

tg.run([]{ taskC(); });
tg.wait();
```

### Dispenso
```cpp
#include <dispenso/task_set.h>

dispenso::TaskSet tasks(dispenso::globalThreadPool());
tasks.schedule([]{ taskA(); });
tasks.schedule([]{ taskB(); });
tasks.wait();

tasks.schedule([]{ taskC(); });
tasks.wait();
```

For recursive task parallelism (where tasks spawn more tasks), use `ConcurrentTaskSet`:

```cpp
#include <dispenso/task_set.h>

dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());

void recursiveWork(dispenso::ConcurrentTaskSet& tasks, int depth) {
    if (depth == 0) return;

    tasks.schedule([&tasks, depth]{ recursiveWork(tasks, depth - 1); });
    tasks.schedule([&tasks, depth]{ recursiveWork(tasks, depth - 1); });
}

recursiveWork(tasks, 10);
tasks.wait();
```

## Concurrent Vector

### TBB
```cpp
#include <tbb/concurrent_vector.h>

tbb::concurrent_vector<int> vec;

tbb::parallel_for(size_t(0), N, [&](size_t i) {
    vec.push_back(compute(i));
});

// Access elements
for (const auto& val : vec) {
    use(val);
}
```

### Dispenso

Dispenso's `ConcurrentVector` has a superset of TBB's API:

```cpp
#include <dispenso/concurrent_vector.h>
#include <dispenso/parallel_for.h>

dispenso::ConcurrentVector<int> vec;

dispenso::parallel_for(size_t{0}, N, [&](size_t i) {
    vec.push_back(compute(i));
});

// Access elements - same as TBB
for (const auto& val : vec) {
    use(val);
}
```

Additional dispenso features:

```cpp
// Grow by multiple elements efficiently
vec.grow_by(100);  // Add 100 default-constructed elements

// Grow with generator (avoids repeated locking)
vec.grow_by_generator(100, [i = 0]() mutable { return i++; });
```

## Task Arenas / Thread Pool Control

### TBB
```cpp
#include <tbb/task_arena.h>
#include <tbb/global_control.h>

// Limit global parallelism
tbb::global_control gc(tbb::global_control::max_allowed_parallelism, 4);

// Or use arena for isolated execution
tbb::task_arena arena(4);  // 4 threads
arena.execute([&] {
    tbb::parallel_for(...);
});
```

### Dispenso

Create explicit thread pools:

```cpp
#include <dispenso/thread_pool.h>
#include <dispenso/parallel_for.h>

// Create a pool with 4 threads
dispenso::ThreadPool pool(4);

// Use the pool for parallel work
dispenso::parallel_for(pool, 0, N, [&](size_t i) {
    process(data[i]);
});

// Or use TaskSet with specific pool
dispenso::TaskSet tasks(pool);
tasks.schedule(work);
```

Multiple pools can coexist for different workloads:

```cpp
dispenso::ThreadPool computePool(8);   // For CPU-bound work
dispenso::ThreadPool ioPool(2);        // For I/O-bound work

dispenso::parallel_for(computePool, 0, N, compute);
dispenso::parallel_for(ioPool, 0, M, ioWork);
```

## Flow Graphs

### TBB
```cpp
#include <tbb/flow_graph.h>

tbb::flow::graph g;

tbb::flow::function_node<int, int> nodeA(g, tbb::flow::unlimited,
    [](int v) { return processA(v); });
tbb::flow::function_node<int, int> nodeB(g, tbb::flow::unlimited,
    [](int v) { return processB(v); });

tbb::flow::make_edge(nodeA, nodeB);

nodeA.try_put(input);
g.wait_for_all();
```

### Dispenso

Dispenso's `Graph` is optimized for task DAGs with potential partial re-execution:

```cpp
#include <dispenso/graph.h>

dispenso::Graph graph;

auto& nodeA = graph.addNode([]{ processA(); });
auto& nodeB = graph.addNode([]{ processB(); });
auto& nodeC = graph.addNode([]{ processC(); });

// Define dependencies
nodeB.dependsOn(nodeA);
nodeC.dependsOn(nodeA);

// Execute the graph
dispenso::execute(graph, dispenso::globalThreadPool());

// For repeated execution with partial updates:
nodeA.setIncomplete();  // Mark as needing re-execution
dispenso::execute(graph, dispenso::globalThreadPool());  // Only re-runs A and dependents
```

## Futures (Dispenso Advantage)

TBB doesn't have a futures interface. Dispenso provides one:

```cpp
#include <dispenso/future.h>

// Async computation
dispenso::Future<int> future = dispenso::async([]() {
    return expensiveComputation();
});

// Do other work while computation runs...

// Get result (blocks if not ready)
int result = future.get();

// Chaining with .then()
auto future2 = dispenso::async([]{ return 42; })
    .then([](int x) { return x * 2; })
    .then([](int x) { return std::to_string(x); });

std::string result = future2.get();  // "84"

// Combining futures
auto f1 = dispenso::async([]{ return computeA(); });
auto f2 = dispenso::async([]{ return computeB(); });

auto combined = dispenso::when_all(f1, f2).then([](auto&& tuple) {
    return std::get<0>(tuple).get() + std::get<1>(tuple).get();
});
```

## Pipelines

### TBB
```cpp
#include <tbb/pipeline.h>

tbb::parallel_pipeline(
    maxTokens,
    tbb::make_filter<void, Data>(tbb::filter::serial_in_order,
        [&](tbb::flow_control& fc) -> Data {
            if (done) { fc.stop(); return {}; }
            return readInput();
        }) &
    tbb::make_filter<Data, Data>(tbb::filter::parallel,
        [](Data d) { return process(d); }) &
    tbb::make_filter<Data, void>(tbb::filter::serial_in_order,
        [](Data d) { writeOutput(d); })
);
```

### Dispenso

Dispenso pipelines are simpler to construct:

```cpp
#include <dispenso/pipeline.h>

dispenso::pipeline(
    dispenso::globalThreadPool(),
    // Stage 1: serial input
    dispenso::stage([]() -> std::optional<Data> {
        if (done) return std::nullopt;
        return readInput();
    }, 1),  // 1 = serial
    // Stage 2: parallel processing
    dispenso::stage([](Data d) { return process(d); }, 0),  // 0 = parallel
    // Stage 3: serial output
    dispenso::stage([](Data d) { writeOutput(d); }, 1)  // 1 = serial
);
```

## Partitioners / Chunking

### TBB
```cpp
// Auto partitioner (default)
tbb::parallel_for(range, body, tbb::auto_partitioner());

// Static partitioner
tbb::parallel_for(range, body, tbb::static_partitioner());

// Affinity partitioner (cache-friendly)
tbb::affinity_partitioner ap;
tbb::parallel_for(range, body, ap);
```

### Dispenso
```cpp
#include <dispenso/parallel_for.h>

// Auto chunking (like auto_partitioner)
dispenso::parallel_for(
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kAuto),
    body);

// Static chunking (like static_partitioner)
dispenso::parallel_for(
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kStatic),
    body);
```

## Spin Mutexes

### TBB
```cpp
#include <tbb/spin_mutex.h>

tbb::spin_mutex mutex;

{
    tbb::spin_mutex::scoped_lock lock(mutex);
    // Critical section
}
```

### Dispenso
```cpp
#include <dispenso/rw_lock.h>
#include <mutex>

// For exclusive access, use std::mutex or dispenso::RWLock
std::mutex mutex;
{
    std::lock_guard<std::mutex> lock(mutex);
    // Critical section
}

// For read-heavy workloads, use RWLock
dispenso::RWLock rwLock;
{
    dispenso::RWLock::ReadGuard rlock(rwLock);
    // Read-only access
}
{
    dispenso::RWLock::WriteGuard wlock(rwLock);
    // Exclusive write access
}
```

## Common Migration Patterns

### Pattern 1: Replace blocked_range with makeChunkedRange

```cpp
// TBB
tbb::parallel_for(tbb::blocked_range<size_t>(0, N, grainSize), body);

// Dispenso
dispenso::ParForOptions options;
options.minItemsPerChunk = grainSize;
dispenso::parallel_for(options,
    dispenso::makeChunkedRange(0, N, dispenso::ParForChunking::kStatic),
    body);
```

### Pattern 2: Replace task_group recursion with ConcurrentTaskSet

```cpp
// TBB
void recursive(tbb::task_group& tg, int depth) {
    if (depth == 0) return;
    tg.run([&tg, depth]{ recursive(tg, depth-1); });
    tg.run([&tg, depth]{ recursive(tg, depth-1); });
}

// Dispenso
void recursive(dispenso::ConcurrentTaskSet& tasks, int depth) {
    if (depth == 0) return;
    tasks.schedule([&tasks, depth]{ recursive(tasks, depth-1); });
    tasks.schedule([&tasks, depth]{ recursive(tasks, depth-1); });
}
```

### Pattern 3: Combining parallel_for with futures

```cpp
// Dispenso allows mixing paradigms easily
auto future = dispenso::async([&]() {
    dispenso::parallel_for(0, N, [&](size_t i) {
        process(data[i]);
    });
    return computeResult(data);
});

// Do other work...
auto result = future.get();
```

## Performance Considerations

1. **Dispenso is faster for nested loops** - TBB's nested parallelism can have higher
   overhead; dispenso's work-stealing is optimized for this case

2. **Dispenso has lower overhead for small loops** - simpler scheduling means less
   overhead for fine-grained parallelism

3. **TBB may be faster for very large, uniform workloads** - TBB's cache-affinity
   partitioner can help in specific scenarios

4. **Use appropriate chunking** - `kStatic` for uniform work, `kAuto` for variable work

5. **Reuse pools and TaskSets** - creation has overhead; reuse when possible

## Further Reading

- [Dispenso Documentation](https://facebookincubator.github.io/dispenso)
- [Getting Started Guide](getting_started.md)
- [Migrating from OpenMP](migrating_from_openmp.md)
- [API Reference](https://facebookincubator.github.io/dispenso/modules.html)


================================================
FILE: docs/third-party/doxygen-awesome/doxygen-awesome-darkmode-toggle.js
================================================
// SPDX-License-Identifier: MIT
/**

Doxygen Awesome
https://github.com/jothepro/doxygen-awesome-css

Copyright (c) 2021 - 2025 jothepro

*/

class DoxygenAwesomeDarkModeToggle extends HTMLElement {
    // SVG icons from https://fonts.google.com/icons
    // Licensed under the Apache 2.0 license:
    // https://www.apache.org/licenses/LICENSE-2.0.html
    static lightModeIcon = `<svg xmlns="http://www.w3.org/2000/svg" enable-background="new 0 0 24 24" height="24px" viewBox="0 0 24 24" width="24px" fill="#FCBF00"><rect fill="none" height="24" width="24"/><circle cx="12" cy="12" opacity=".3" r="3"/><path d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg>`
    static darkModeIcon = `<svg xmlns="http://www.w3.org/2000/svg" enable-background="new 0 0 24 24" height="24px" viewBox="0 0 24 24" width="24px" fill="#FE9700"><rect fill="none" height="24" width="24"/><path d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27 C17.45,17.19,14.93,19,12,19c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z" opacity=".3"/><path d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg>`
    static title = "Toggle Light/Dark Mode"

    static prefersLightModeInDarkModeKey = "prefers-light-mode-in-dark-mode"
    static prefersDarkModeInLightModeKey = "prefers-dark-mode-in-light-mode"

    static _staticConstructor = function() {
        DoxygenAwesomeDarkModeToggle.enableDarkMode(DoxygenAwesomeDarkModeToggle.userPreference)
        // Update the color scheme when the browsers preference changes
        // without user interaction on the website.
        window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => {
            DoxygenAwesomeDarkModeToggle.onSystemPreferenceChanged()
        })
        // Update the color scheme when the tab is made visible again.
        // It is possible that the appearance was changed in another tab
        // while this tab was in the background.
        document.addEventListener("visibilitychange", visibilityState => {
            if (document.visibilityState === 'visible') {
                DoxygenAwesomeDarkModeToggle.onSystemPreferenceChanged()
            }
        });
    }()

    static init() {
        $(function() {
            $(document).ready(function() {
                const toggleButton = document.createElement('doxygen-awesome-dark-mode-toggle')
                toggleButton.title = DoxygenAwesomeDarkModeToggle.title
                toggleButton.updateIcon()

                window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => {
                    toggleButton.updateIcon()
                })
                document.addEventListener("visibilitychange", visibilityState => {
                    if (document.visibilityState === 'visible') {
                        toggleButton.updateIcon()
                    }
                });

                $(document).ready(function(){
                    document.getElementById("MSearchBox").parentNode.appendChild(toggleButton)
                })
                $(window).resize(function(){
                    document.getElementById("MSearchBox").parentNode.appendChild(toggleButton)
                })
            })
        })
    }

    constructor() {
        super();
        this.onclick=this.toggleDarkMode
    }

    /**
     * @returns `true` for dark-mode, `false` for light-mode system preference
     */
    static get systemPreference() {
        return window.matchMedia('(prefers-color-scheme: dark)').matches
    }

    /**
     * @returns `true` for dark-mode, `false` for light-mode user preference
     */
    static get userPreference() {
        return (!DoxygenAwesomeDarkModeToggle.systemPreference && localStorage.getItem(DoxygenAwesomeDarkModeToggle.prefersDarkModeInLightModeKey)) ||
        (DoxygenAwesomeDarkModeToggle.systemPreference && !localStorage.getItem(DoxygenAwesomeDarkModeToggle.prefersLightModeInDarkModeKey))
    }

    static set userPreference(userPreference) {
        DoxygenAwesomeDarkModeToggle.darkModeEnabled = userPreference
        if(!userPreference) {
            if(DoxygenAwesomeDarkModeToggle.systemPreference) {
                localStorage.setItem(DoxygenAwesomeDarkModeToggle.prefersLightModeInDarkModeKey, true)
            } else {
                localStorage.removeItem(DoxygenAwesomeDarkModeToggle.prefersDarkModeInLightModeKey)
            }
        } else {
            if(!DoxygenAwesomeDarkModeToggle.systemPreference) {
                localStorage.setItem(DoxygenAwesomeDarkModeToggle.prefersDarkModeInLightModeKey, true)
            } else {
                localStorage.removeItem(DoxygenAwesomeDarkModeToggle.prefersLightModeInDarkModeKey)
            }
        }
        DoxygenAwesomeDarkModeToggle.onUserPreferenceChanged()
    }

    static enableDarkMode(enable) {
        if(enable) {
            DoxygenAwesomeDarkModeToggle.darkModeEnabled = true
            document.documentElement.classList.add("dark-mode")
            document.documentElement.classList.remove("light-mode")
        } else {
            DoxygenAwesomeDarkModeToggle.darkModeEnabled = false
            document.documentElement.classList.remove("dark-mode")
            document.documentElement.classList.add("light-mode")
        }
    }

    static onSystemPreferenceChanged() {
        DoxygenAwesomeDarkModeToggle.darkModeEnabled = DoxygenAwesomeDarkModeToggle.userPreference
        DoxygenAwesomeDarkModeToggle.enableDarkMode(DoxygenAwesomeDarkModeToggle.darkModeEnabled)
    }

    static onUserPreferenceChanged() {
        DoxygenAwesomeDarkModeToggle.enableDarkMode(DoxygenAwesomeDarkModeToggle.darkModeEnabled)
    }

    toggleDarkMode() {
        DoxygenAwesomeDarkModeToggle.userPreference = !DoxygenAwesomeDarkModeToggle.userPreference
        this.updateIcon()
    }

    updateIcon() {
        if(DoxygenAwesomeDarkModeToggle.darkModeEnabled) {
            this.innerHTML = DoxygenAwesomeDarkModeToggle.darkModeIcon
        } else {
            this.innerHTML = DoxygenAwesomeDarkModeToggle.lightModeIcon
        }
    }
}

customElements.define("doxygen-awesome-dark-mode-toggle", DoxygenAwesomeDarkModeToggle);


================================================
FILE: docs/third-party/doxygen-awesome/doxygen-awesome.css
================================================
/* SPDX-License-Identifier: MIT */
/**

Doxygen Awesome
https://github.com/jothepro/doxygen-awesome-css

Copyright (c) 2021 - 2025 jothepro

*/

html {
    /* primary theme color. This will affect the entire websites color scheme: links, arrows, labels, ... */
    --primary-color: #1779c4;
    --primary-dark-color: #335c80;
    --primary-light-color: #70b1e9;
    --on-primary-color: #ffffff;

    --link-color: var(--primary-color);

    /* page base colors */
    --page-background-color: #ffffff;
    --page-foreground-color: #2f4153;
    --page-secondary-foreground-color: #6f7e8e;

    /* color for all separators on the website: hr, borders, ... */
    --separator-color: #dedede;

    /* border radius for all rounded components. Will affect many components, like dropdowns, memitems, codeblocks, ... */
    --border-radius-large: 10px;
    --border-radius-small: 5px;
    --border-radius-medium: 8px;

    /* default spacings. Most components reference these values for spacing, to provide uniform spacing on the page. */
    --spacing-small: 5px;
    --spacing-medium: 10px;
    --spacing-large: 16px;
    --spacing-xlarge: 20px;

    /* default box shadow used for raising an element above the normal content. Used in dropdowns, search result, ... */
    --box-shadow: 0 2px 8px 0 rgba(0,0,0,.075);

    --odd-color: rgba(0,0,0,.028);

    /* font-families. will affect all text on the website
     * font-family: the normal font for text, headlines, menus
     * font-family-monospace: used for preformatted text in memtitle, code, fragments
     */
    --font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,sans-serif;
    --font-family-monospace: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace;

    /* font sizes */
    --page-font-size: 15.6px;
    --navigation-font-size: 14.4px;
    --toc-font-size: 13.4px;
    --code-font-size: 14px; /* affects code, fragment */
    --title-font-size: 22px;

    /* content text properties. These only affect the page content, not the navigation or any other ui elements */
    --content-line-height: 27px;
    /* The content is centered and constraint in it's width. To make the content fill the whole page, set the variable to auto.*/
    --content-maxwidth: 1050px;
    --table-line-height: 24px;
    --toc-sticky-top: var(--spacing-medium);
    --toc-width: 200px;
    --toc-max-height: calc(100vh - 2 * var(--spacing-medium) - 85px);

    /* colors for various content boxes: @warning, @note, @deprecated @bug */
    --warning-color: #faf3d8;
    --warning-color-dark: #f3a600;
    --warning-color-darker: #5f4204;
    --note-color: #e4f3ff;
    --note-color-dark: #1879C4;
    --note-color-darker: #274a5c;
    --todo-color: #e4dafd;
    --todo-color-dark: #5b2bdd;
    --todo-color-darker: #2a0d72;
    --deprecated-color: #ecf0f3;
    --deprecated-color-dark: #5b6269;
    --deprecated-color-darker: #43454a;
    --bug-color: #f8d1cc;
    --bug-color-dark: #b61825;
    --bug-color-darker: #75070f;
    --invariant-color: #d8f1e3;
    --invariant-color-dark: #44b86f;
    --invariant-color-darker: #265532;

    /* blockquote colors */
    --blockquote-background: #f8f9fa;
    --blockquote-foreground: #636568;

    /* table colors */
    --tablehead-background: #f1f1f1;
    --tablehead-foreground: var(--page-foreground-color);

    /* menu-display: block | none
     * Visibility of the top navigation on screens >= 768px. On smaller screen the menu is always visible.
     * `GENERATE_TREEVIEW` MUST be enabled!
     */
    --menu-display: block;

    --menu-focus-foreground: var(--on-primary-color);
    --menu-focus-background: var(--primary-color);
    --menu-selected-background: rgba(0,0,0,.05);


    --header-background: var(--page-background-color);
    --header-foreground: var(--page-foreground-color);

    /* searchbar colors */
    --searchbar-background: var(--side-nav-background);
    --searchbar-foreground: var(--page-foreground-color);

    /* searchbar size
     * (`searchbar-width` is only applied on screens >= 768px.
     * on smaller screens the searchbar will always fill the entire screen width) */
    --searchbar-height: 33px;
    --searchbar-width: 210px;
    --searchbar-border-radius: var(--searchbar-height);

    /* code block colors */
    --code-background: #f5f5f5;
    --code-foreground: var(--page-foreground-color);

    /* fragment colors */
    --fragment-background: #F8F9FA;
    --fragment-foreground: #37474F;
    --fragment-keyword: #bb6bb2;
    --fragment-keywordtype: #8258b3;
    --fragment-keywordflow: #d67c3b;
    --fragment-token: #438a59;
    --fragment-comment: #969696;
    --fragment-link: #5383d6;
    --fragment-preprocessor: #46aaa5;
    --fragment-linenumber-color: #797979;
    --fragment-linenumber-background: #f4f4f5;
    --fragment-linenumber-border: #e3e5e7;
    --fragment-lineheight: 20px;

    /* sidebar navigation (treeview) colors */
    --side-nav-background: #fbfbfb;
    --side-nav-foreground: var(--page-foreground-color);
    --side-nav-arrow-opacity: 0;
    --side-nav-arrow-hover-opacity: 0.9;

    --toc-background: var(--side-nav-background);
    --toc-foreground: var(--side-nav-foreground);

    /* height of an item in any tree / collapsible table */
    --tree-item-height: 30px;

    --memname-font-size: var(--code-font-size);
    --memtitle-font-size: 18px;

    --webkit-scrollbar-size: 7px;
    --webkit-scrollbar-padding: 4px;
    --webkit-scrollbar-color: var(--separator-color);

    --animation-duration: .12s
}

@media screen and (max-width: 767px) {
    html {
        --page-font-size: 16px;
        --navigation-font-size: 16px;
        --toc-font-size: 15px;
        --code-font-size: 15px; /* affects code, fragment */
        --title-font-size: 22px;
    }
}

@media (prefers-color-scheme: dark) {
    html:not(.light-mode) {
        color-scheme: dark;

        --primary-color: #1982d2;
        --primary-dark-color: #86a9c4;
        --primary-light-color: #4779ac;

        --box-shadow: 0 2px 8px 0 rgba(0,0,0,.35);

        --odd-color: rgba(100,100,100,.06);

        --menu-selected-background: rgba(0,0,0,.4);

        --page-background-color: #1C1D1F;
        --page-foreground-color: #d2dbde;
        --page-secondary-foreground-color: #859399;
        --separator-color: #38393b;
        --side-nav-background: #252628;

        --code-background: #2a2c2f;

        --tablehead-background: #2a2c2f;

        --blockquote-background: #222325;
        --blockquote-foreground: #7e8c92;

        --warning-color: #3b2e04;
        --warning-color-dark: #f1b602;
        --warning-color-darker: #ceb670;
        --note-color: #163750;
        --note-color-dark: #1982D2;
        --note-color-darker: #dcf0fa;
        --todo-color: #2a2536;
        --todo-color-dark: #7661b3;
        --todo-color-darker: #ae9ed6;
        --deprecated-color: #2e323b;
        --deprecated-color-dark: #738396;
        --deprecated-color-darker: #abb0bd;
        --bug-color: #2e1917;
        --bug-color-dark: #ad2617;
        --bug-color-darker: #f5b1aa;
        --invariant-color: #303a35;
        --invariant-color-dark: #76ce96;
        --invariant-color-darker: #cceed5;

        --fragment-background: #282c34;
        --fragment-foreground: #dbe4eb;
        --fragment-keyword: #cc99cd;
        --fragment-keywordtype: #ab99cd;
        --fragment-keywordflow: #e08000;
        --fragment-token: #7ec699;
        --fragment-comment: #999999;
        --fragment-link: #98c0e3;
        --fragment-preprocessor: #65cabe;
        --fragment-linenumber-color: #cccccc;
        --fragment-linenumber-background: #35393c;
        --fragment-linenumber-border: #1f1f1f;
    }
}

/* dark mode variables are defined twice, to support both the dark-mode without and with doxygen-awesome-darkmode-toggle.js */
html.dark-mode {
    color-scheme: dark;

    --primary-color: #1982d2;
    --primary-dark-color: #86a9c4;
    --primary-light-color: #4779ac;

    --box-shadow: 0 2px 8px 0 rgba(0,0,0,.30);

    --odd-color: rgba(100,100,100,.06);

    --menu-selected-background: rgba(0,0,0,.4);

    --page-background-color: #1C1D1F;
    --page-foreground-color: #d2dbde;
    --page-secondary-foreground-color: #859399;
    --separator-color: #38393b;
    --side-nav-background: #252628;

    --code-background: #2a2c2f;

    --tablehead-background: #2a2c2f;

    --blockquote-background: #222325;
    --blockquote-foreground: #7e8c92;

    --warning-color: #3b2e04;
    --warning-color-dark: #f1b602;
    --warning-color-darker: #ceb670;
    --note-color: #163750;
    --note-color-dark: #1982D2;
    --note-color-darker: #dcf0fa;
    --todo-color: #2a2536;
    --todo-color-dark: #7661b3;
    --todo-color-darker: #ae9ed6;
    --deprecated-color: #2e323b;
    --deprecated-color-dark: #738396;
    --deprecated-color-darker: #abb0bd;
    --bug-color: #2e1917;
    --bug-color-dark: #ad2617;
    --bug-color-darker: #f5b1aa;
    --invariant-color: #303a35;
    --invariant-color-dark: #76ce96;
    --invariant-color-darker: #cceed5;

    --fragment-background: #282c34;
    --fragment-foreground: #dbe4eb;
    --fragment-keyword: #cc99cd;
    --fragment-keywordtype: #ab99cd;
    --fragment-keywordflow: #e08000;
    --fragment-token: #7ec699;
    --fragment-comment: #999999;
    --fragment-link: #98c0e3;
    --fragment-preprocessor: #65cabe;
    --fragment-linenumber-color: #cccccc;
    --fragment-linenumber-background: #35393c;
    --fragment-linenumber-border: #1f1f1f;
}

body {
    color: var(--page-foreground-color);
    background-color: var(--page-background-color);
    font-size: var(--page-font-size);
}

body, table, div, p, dl, #nav-tree .label, #nav-tree a, .title,
.sm-dox a, .sm-dox a:hover, .sm-dox a:focus, #projectname,
.SelectItem, #MSearchField, .navpath li.navelem a,
.navpath li.navelem a:hover, p.reference, p.definition, div.toc li, div.toc h3,
#page-nav ul.page-outline li a {
    font-family: var(--font-family);
}

h1, h2, h3, h4, h5 {
    margin-top: 1em;
    font-weight: 600;
    line-height: initial;
}

p, div, table, dl, p.reference, p.definition {
    font-size: var(--page-font-size);
}

p.reference, p.definition {
    color: var(--page-secondary-foreground-color);
}

a:link, a:visited, a:hover, a:focus, a:active {
    color: var(--link-color) !important;
    font-weight: 500;
    background: none;
}

a:hover {
    text-decoration: underline;
}

a.anchor {
    scroll-margin-top: var(--spacing-large);
    display: block;
}

/*
 Title and top navigation
 */

#top {
    background: var(--header-background);
    border-bottom: 1px solid var(--separator-color);
    position: relative;
    z-index: 99;
}

@media screen and (min-width: 768px) {
    #top {
        display: flex;
        flex-wrap: wrap;
        justify-content: space-between;
        align-items: center;
    }
}

#main-nav {
    flex-grow: 5;
    padding: var(--spacing-small) var(--spacing-medium);
    border-bottom: 0;
}

#titlearea {
    width: auto;
    padding: var(--spacing-medium) var(--spacing-large);
    background: none;
    color: var(--header-foreground);
    border-bottom: none;
}

@media screen and (max-width: 767px) {
    #titlearea {
        padding-bottom: var(--spacing-small);
    }
}

#titlearea table tbody tr {
    height: auto !important;
}

#projectname {
    font-size: var(--title-font-size);
    font-weight: 600;
}

#projectnumber {
    font-family: inherit;
    font-size: 60%;
}

#projectbrief {
    font-family: inherit;
    font-size: 80%;
}

#projectlogo {
    vertical-align: middle;
}

#projectlogo img {
    max-height: calc(var(--title-font-size) * 2);
    margin-right: var(--spacing-small);
}

.sm-dox, .tabs, .tabs2, .tabs3 {
    background: none;
    padding: 0;
}

.tabs, .tabs2, .tabs3 {
    border-bottom: 1px solid var(--separator-color);
    margin-bottom: -1px;
}

.main-menu-btn-icon, .main-menu-btn-icon:before, .main-menu-btn-icon:after {
    background: var(--page-secondary-foreground-color);
}

@media screen and (max-width: 767px) {
    .sm-dox a span.sub-arrow {
        background: var(--code-background);
    }

    #main-menu a.has-submenu span.sub-arrow {
        color: var(--page-secondary-foreground-color);
        border-radius: var(--border-radius-medium);
    }

    #main-menu a.has-submenu:hover span.sub-arrow {
        color: var(--page-foreground-color);
    }
}

@media screen and (min-width: 768px) {
    .sm-dox li, .tablist li {
        display: var(--menu-display);
    }

    .sm-dox a span.sub-arrow {
        top: 15px;
        right: 10px;
        box-sizing: content-box;
        padding: 0;
        margin: 0;
        display: inline-block;
        width: 5px;
        height: 5px;
        transform: rotate(45deg);
        border-width: 0;
        border-right: 2px solid var(--header-foreground);
        border-bottom: 2px solid var(--header-foreground);
        background: none;
    }

    .sm-dox a:hover span.sub-arrow {
        border-color: var(--menu-focus-foreground);
        background: none;
    }

    .sm-dox ul a span.sub-arrow {
        transform: rotate(-45deg);
        border-width: 0;
        border-right: 2px solid var(--header-foreground);
        border-bottom: 2px solid var(--header-foreground);
    }

    .sm-dox ul a:hover span.sub-arrow {
        border-color: var(--menu-focus-foreground);
        background: none;
    }
}

.sm-dox ul {
    background: var(--page-background-color);
    box-shadow: var(--box-shadow);
    border: 1px solid var(--separator-color);
    border-radius: var(--border-radius-medium) !important;
    padding: var(--spacing-small);
    animation: ease-out 150ms slideInMenu;
}

@keyframes slideInMenu {
    from {
        opacity: 0;
        transform: translate(0px, -2px);
    }

    to {
        opacity: 1;
        transform: translate(0px, 0px);
    }
}

.sm-dox ul a {
    color: var(--page-foreground-color) !important;
    background: none;
    font-size: var(--navigation-font-size);
}

.sm-dox>li>ul:after {
    border-bottom-color: var(--page-background-color) !important;
}

.sm-dox>li>ul:before {
    border-bottom-color: var(--separator-color) !important;
}

.sm-dox ul a:hover, .sm-dox ul a:active, .sm-dox ul a:focus {
    font-size: var(--navigation-font-size) !important;
    color: var(--menu-focus-foreground) !important;
    text-shadow: none;
    background-color: var(--menu-focus-background);
    border-radius: var(--border-radius-small) !important;
}

.sm-dox a, .sm-dox a:focus, .tablist li, .tablist li a, .tablist li.current a {
    text-shadow: none;
    background: transparent;
    background-image: none !important;
    color: var(--header-foreground) !important;
    font-weight: normal;
    font-size: var(--navigation-font-size);
    border-radius: var(--border-radius-small) !important;
}

.sm-dox a:focus {
    outline: auto;
}

.sm-dox a:hover, .sm-dox a:active, .tablist li a:hover {
    text-shadow: none;
    font-weight: normal;
    background: var(--menu-focus-background);
    color: var(--menu-focus-foreground) !important;
    border-radius: var(--border-radius-small) !important;
    font-size: var(--navigation-font-size);
}

.tablist li.current {
    border-radius: var(--border-radius-small);
    background: var(--menu-selected-background);
}

.tablist li {
    margin: var(--spacing-small) 0 var(--spacing-small) var(--spacing-small);
}

.tablist a {
    padding: 0 var(--spacing-large);
}


/*
 Search box
 */

#MSearchBox {
    height: var(--searchbar-height);
    background: var(--searchbar-background);
    border-radius: var(--searchbar-border-radius);
    border: 1px solid var(--separator-color);
    overflow: hidden;
    width: var(--searchbar-width);
    position: relative;
    box-shadow: none;
    display: block;
    margin-top: 0;
    margin-right: 0;
}

@media (min-width: 768px) {
    .sm-dox li {
        padding: 0;
    }
}

/* until Doxygen 1.9.4 */
.left img#MSearchSelect {
    left: 0;
    user-select: none;
    padding-left: 8px;
}

/* Doxygen 1.9.5 */
.left span#MSearchSelect {
    left: 0;
    user-select: none;
    margin-left: 8px;
    padding: 0;
}

.left #MSearchSelect[src$=".png"] {
    padding-left: 0
}

/* Doxygen 1.14.0 */
.search-icon::before {
    background: none;
    top: 5px;
}

.search-icon::after {
    background: none;
    top: 12px;
}

.SelectionMark {
    user-select: none;
}

.tabs .left #MSearchSelect {
    padding-left: 0;
}

.tabs #MSearchBox {
    position: absolute;
    right: var(--spacing-medium);
}

@media screen and (max-width: 767px) {
    .tabs #MSearchBox {
        position: relative;
        right: 0;
        margin-left: var(--spacing-medium);
        margin-top: 0;
    }
}

#MSearchSelectWindow, #MSearchResultsWindow {
    z-index: 9999;
}

#MSearchBox.MSearchBoxActive {
    border-color: var(--primary-color);
    box-shadow: inset 0 0 0 1px var(--primary-color);
}

#main-menu > li:last-child {
    margin-right: 0;
}

@media screen and (max-width: 767px) {
    #main-menu > li:last-child {
        height: 50px;
    }
}

#MSearchField {
    font-size: var(--navigation-font-size);
    height: calc(var(--searchbar-height) - 2px);
    background: transparent;
    width: calc(var(--searchbar-width) - 64px);
}

.MSearchBoxActive #MSearchField {
    color: var(--searchbar-foreground);
}

#MSearchSelect {
    top: calc(calc(var(--searchbar-height) / 2) - 11px);
}

#MSearchBox span.left, #MSearchBox span.right {
    background: none;
    background-image: none;
}

#MSearchBox span.right {
    padding-top: calc(calc(var(--searchbar-height) / 2) - 12px);
    position: absolute;
    right: var(--spacing-small);
}

.tabs #MSearchBox span.right {
    top: calc(calc(var(--searchbar-height) / 2) - 12px);
}

@keyframes slideInSearchResults {
    from {
        opacity: 0;
        transform: translate(0, 15px);
    }

    to {
        opacity: 1;
        transform: translate(0, 20px);
    }
}

#MSearchResultsWindow {
    left: auto !important;
    right: var(--spacing-medium);
    border-radius: var(--border-radius-large);
    border: 1px solid var(--separator-color);
    transform: translate(0, 20px);
    box-shadow: var(--box-shadow);
    animation: ease-out 280ms slideInSearchResults;
    background: var(--page-background-color);
}

iframe#MSearchResults {
    margin: 4px;
}

iframe {
    color-scheme: normal;
}

@media (prefers-color-scheme: dark) {
    html:not(.light-mode) iframe#MSearchResults {
        filter: invert() hue-rotate(180deg);
    }
}

html.dark-mode iframe#MSearchResults {
    filter: invert() hue-rotate(180deg);
}

#MSearchResults .SRPage {
    background-color: transparent;
}

#MSearchResults .SRPage .SREntry {
    font-size: 10pt;
    padding: var(--spacing-small) var(--spacing-medium);
}

#MSearchSelectWindow {
    border: 1px solid var(--separator-color);
    border-radius: var(--border-radius-medium);
    box-shadow: var(--box-shadow);
    background: var(--page-background-color);
    padding-top: var(--spacing-small);
    padding-bottom: var(--spacing-small);
}

#MSearchSelectWindow a.SelectItem {
    font-size: var(--navigation-font-size);
    line-height: var(--content-line-height);
    margin: 0 var(--spacing-small);
    border-radius: var(--border-radius-small);
    color: var(--page-foreground-color) !important;
    font-weight: normal;
}

#MSearchSelectWindow a.SelectItem:hover {
    background: var(--menu-focus-background);
    color: var(--menu-focus-foreground) !important;
}

@media screen and (max-width: 767px) {
    #MSearchBox {
        margin-top: var(--spacing-medium);
        margin-bottom: var(--spacing-medium);
        width: calc(100vw - 30px);
    }

    #main-menu > li:last-child {
        float: none !important;
    }

    #MSearchField {
        width: calc(100vw - 110px);
    }

    @keyframes slideInSearchResultsMobile {
        from {
            opacity: 0;
            transform: translate(0, 15px);
        }

        to {
            opacity: 1;
            transform: translate(0, 20px);
        }
    }

    #MSearchResultsWindow {
        left: var(--spacing-medium) !important;
        right: var(--spacing-medium);
        overflow: auto;
        transform: translate(0, 20px);
        animation: ease-out 280ms slideInSearchResultsMobile;
        width: auto !important;
    }

    /*
     * Overwrites for fixing the searchbox on mobile in doxygen 1.9.2
     */
    label.main-menu-btn ~ #searchBoxPos1 {
        top: 3px !important;
        right: 6px !important;
        left: 45px;
        display: flex;
    }

    label.main-menu-btn ~ #searchBoxPos1 > #MSearchBox {
        margin-top: 0;
        margin-bottom: 0;
        flex-grow: 2;
        float: left;
    }
}

/*
 Tree view
 */

#side-nav {
    min-width: 8px;
    max-width: 50vw;
}


#nav-tree, #top {
    border-right: 1px solid var(--separator-color);
}

@media screen and (max-width: 767px) {
    #side-nav {
        display: none;
    }

    #doc-content {
        margin-left: 0 !important;
    }

    #top {
        border-right: none;
    }
}

#nav-tree {
    background: var(--side-nav-background);
    margin-right: -1px;
    padding: 0;
}

#nav-tree .label {
    font-size: var(--navigation-font-size);
    line-height: var(--tree-item-height);
}

#nav-tree span.label a:hover {
    background: none;
}

#nav-tree .item {
    height: var(--tree-item-height);
    line-height: var(--tree-item-height);
    overflow: hidden;
    text-overflow: ellipsis;
    margin: 0;
    padding: 0;
}

#nav-tree-contents {
    margin: 0;
}

#main-menu > li:last-child {
    height: auto;
}

#nav-tree .item > a:focus {
    outline: none;
}

#nav-sync {
    bottom: var(--spacing-medium);
    right: var(--spacing-medium) !important;
    top: auto !important;
    user-select: none;
}

div.nav-sync-icon {
    border: 1px solid var(--separator-color);
    border-radius: var(--border-radius-medium);
    background: var(--page-background-color);
    width: 30px;
    height: 20px;
}

div.nav-sync-icon:hover {
    background: var(--page-background-color);
}

span.sync-icon-left, div.nav-sync-icon:hover span.sync-icon-left {
    border-left: 2px solid var(--primary-color);
    border-top: 2px solid var(--primary-color);
    top: 5px;
    left: 6px;
}
span.sync-icon-right, div.nav-sync-icon:hover span.sync-icon-right {
    border-right: 2px solid var(--primary-color);
    border-bottom: 2px solid var(--primary-color);
    top: 5px;
    left: initial;
    right: 6px;
}

div.nav-sync-icon.active::after, div.nav-sync-icon.active:hover::after {
    border-top: 2px solid var(--primary-color);
    top: 9px;
    left: 6px;
    width: 19px;
}

#nav-tree .selected {
    text-shadow: none;
    background-image: none;
    background-color: transparent;
    position: relative;
    color: var(--primary-color) !important;
    font-weight: 500;
}

#nav-tree .selected::after {
    content: "";
    position: absolute;
    top: 1px;
    bottom: 1px;
    left: 0;
    width: 4px;
    border-radius: 0 var(--border-radius-small) var(--border-radius-small) 0;
    background: var(--primary-color);
}


#nav-tree a {
    color: var(--side-nav-foreground) !important;
    font-weight: normal;
}

#nav-tree a:focus {
    outline-style: auto;
}

#nav-tree .arrow {
    opacity: var(--side-nav-arrow-opacity);
    background: none;
}

#nav-tree span.arrowhead {
    margin: 0 0 1px 2px;
}

span.arrowhead {
    border-color: var(--primary-light-color);
}

.selected span.arrowhead {
    border-color: var(--primary-color);
}

#nav-tree-contents > ul > li:first-child > div > a {
    opacity: 0;
    pointer-events: none;
}

.contents .arrow {
    color: inherit;
    cursor: pointer;
    font-size: 45%;
    vertical-align: middle;
    margin-right: 2px;
    font-family: serif;
    height: auto;
    padding-bottom: 4px;
}

#nav-tree div.item:hover .arrow, #nav-tree a:focus .arrow {
    opacity: var(--side-nav-arrow-hover-opacity);
}

#nav-tree .selected a {
    color: var(--primary-color) !important;
    font-weight: bolder;
    font-weight: 600;
}

.ui-resizable-e {
    background: none;
}

.ui-resizable-e:hover {
    background: var(--separator-color);
}

/*
 Contents
 */

div.header {
    border-bottom: 1px solid var(--separator-color);
    background: none;
    background-image: none;
}

@media screen and (min-width: 1000px) {
    #doc-content > div > div.contents,
    .PageDoc > div.contents {
        display: flex;
        flex-direction: row-reverse;
        flex-wrap: nowrap;
        align-items: flex-start;
    }

    div.contents .textblock {
        min-width: 200px;
        flex-grow: 1;
    }
}

div.contents, div.header .title, div.header .summary {
    max-width: var(--content-maxwidth);
}

div.contents, div.header .title  {
    line-height: initial;
    margin: calc(var(--spacing-medium) + .2em) auto var(--spacing-medium) auto;
}

div.header .summary {
    margin: var(--spacing-medium) auto 0 auto;
}

div.headertitle {
    padding: 0;
}

div.header .title {
    font-weight: 600;
    font-size: 225%;
    padding: var(--spacing-medium) var(--spacing-xlarge);
    word-break: break-word;
}

div.header .summary {
    width: auto;
    display: block;
    float: none;
    padding: 0 var(--spacing-large);
}

td.memSeparator {
    border-color: var(--separator-color);
}

span.mlabel {
    background: var(--primary-color);
    color: var(--on-primary-color);
    border: none;
    padding: 4px 9px;
    border-radius: var(--border-radius-large);
    margin-right: var(--spacing-medium);
}

span.mlabel:last-of-type {
    margin-right: 2px;
}

div.contents {
    padding: 0 var(--spacing-xlarge);
}

div.contents p, div.contents li {
    line-height: var(--content-line-height);
}

div.contents div.dyncontent {
    margin: var(--spacing-medium) 0;
}

@media screen and (max-width: 767px) {
    div.contents {
        padding: 0 var(--spacing-large);
    }

    div.header .title {
        padding: var(--spacing-medium) var(--spacing-large);
    }
}

@media (prefers-color-scheme: dark) {
    html:not(.light-mode) div.contents div.dyncontent img,
    html:not(.light-mode) div.contents center img,
    html:not(.light-mode) div.contents > table img,
    html:not(.light-mode) div.contents div.dyncontent iframe,
    html:not(.light-mode) div.contents center iframe,
    html:not(.light-mode) div.contents table iframe,
    html:not(.light-mode) div.contents .dotgraph iframe {
        filter: brightness(89%) hue-rotate(180deg) invert();
    }
}

html.dark-mode div.contents div.dyncontent img,
html.dark-mode div.contents center img,
html.dark-mode div.contents > table img,
html.dark-mode div.contents div.dyncontent iframe,
html.dark-mode div.contents center iframe,
html.dark-mode div.contents table iframe,
html.dark-mode div.contents .dotgraph iframe
 {
    filter: brightness(89%) hue-rotate(180deg) invert();
}

td h2.groupheader, h2.groupheader {
    border-bottom: 0px;
    color: var(--page-foreground-color);
    box-shadow:
        100px 0 var(--page-background-color),
        -100px 0 var(--page-background-color),
        100px 0.75px var(--separator-color),
        -100px 0.75px var(--separator-color),
        500px 0 var(--page-background-color),
        -500px 0 var(--page-background-color),
        500px 0.75px var(--separator-color),
        -500px 0.75px var(--separator-color),
        900px 0 var(--page-background-color),
        -900px 0 var(--page-background-color),
        900px 0.75px var(--separator-color),
        -900px 0.75px var(--separator-color),
        1400px 0 var(--page-background-color),
        -1400px 0 var(--page-background-color),
        1400px 0.75px var(--separator-color),
        -1400px 0.75px var(--separator-color),
        1900px 0 var(--page-background-color),
        -1900px 0 var(--page-background-color),
        1900px 0.75px var(--separator-color),
        -1900px 0.75px var(--separator-color);
}

blockquote {
    margin: 0 var(--spacing-medium) 0 var(--spacing-medium);
    padding: var(--spacing-small) var(--spacing-large);
    background: var(--blockquote-background);
    color: var(--blockquote-foreground);
    border-left: 0;
    overflow: visible;
    border-radius: var(--border-radius-medium);
    overflow: visible;
    position: relative;
}

blockquote::before, blockquote::after {
    font-weight: bold;
    font-family: serif;
    font-size: 360%;
    opacity: .15;
    position: absolute;
}

blockquote::before {
    content: "“";
    left: -10px;
    top: 4px;
}

blockquote::after {
    content: "”";
    right: -8px;
    bottom: -25px;
}

blockquote p {
    margin: var(--spacing-small) 0 var(--spacing-medium) 0;
}
.paramname, .paramname em {
    font-weight: 600;
    color: var(--primary-dark-color);
}

.paramname > code {
    border: 0;
}

table.params .paramname {
    font-weight: 600;
    font-family: var(--font-family-monospace);
    font-size: var(--code-font-size);
    padding-right: var(--spacing-small);
    line-height: var(--table-line-height);
}

h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow {
    text-shadow: 0 0 15px var(--primary-light-color);
}

.alphachar a {
    color: var(--page-foreground-color);
}

.dotgraph {
    max-width: 100%;
    overflow-x: scroll;
}

.dotgraph .caption {
    position: sticky;
    left: 0;
}

/* Wrap Graphviz graphs with the `interactive_dotgraph` class if `INTERACTIVE_SVG = YES` */
.interactive_dotgraph .dotgraph iframe {
    max-width: 100%;
}

/*
 Table of Contents
 */

div.contents .toc {
    max-height: var(--toc-max-height);
    min-width: var(--toc-width);
    border: 0;
    border-left: 1px solid var(--separator-color);
    border-radius: 0;
    background-color: var(--page-background-color);
    box-shadow: none;
    position: sticky;
    top: var(--toc-sticky-top);
    padding: 0 var(--spacing-large);
    margin: var(--spacing-small) 0 var(--spacing-large) var(--spacing-large);
}

div.toc h3 {
    color: var(--toc-foreground);
    font-size: var(--navigation-font-size);
    margin: var(--spacing-large) 0 var(--spacing-medium) 0;
}

div.toc li {
    padding: 0;
    background: none;
    line-height: var(--toc-font-size);
    margin: var(--toc-font-size) 0 0 0;
}

div.toc li::before {
    display: none;
}

div.toc ul {
    margin-top: 0
}

div.toc li a {
    font-size: var(--toc-font-size);
    color: var(--page-foreground-color) !important;
    text-decoration: none;
}

div.toc li a:hover, div.toc li a.active {
    color: var(--primary-color) !important;
}

div.toc li a.aboveActive {
    color: var(--page-secondary-foreground-color) !important;
}


@media screen and (max-width: 999px) {
    div.contents .toc {
        max-height: 45vh;
        float: none;
        width: auto;
        margin: 0 0 var(--spacing-medium) 0;
        position: relative;
        top: 0;
        position: relative;
        border: 1px solid var(--separator-color);
        border-radius: var(--border-radius-medium);
        background-color: var(--toc-background);
        box-shadow: var(--box-shadow);
    }

    div.contents .toc.interactive {
        max-height: calc(var(--navigation-font-size) + 2 * var(--spacing-large));
        overflow: hidden;
    }

    div.contents .toc > h3 {
        -webkit-tap-highlight-color: transparent;
        cursor: pointer;
        position: sticky;
        top: 0;
        background-color: var(--toc-background);
        margin: 0;
        padding: var(--spacing-large) 0;
        display: block;
    }

    div.contents .toc.interactive > h3::before {
        content: "";
        width: 0;
        height: 0;
        border-left: 4px solid transparent;
        border-right: 4px solid transparent;
        border-top: 5px solid var(--primary-color);
        display: inline-block;
        margin-right: var(--spacing-small);
        margin-bottom: calc(var(--navigation-font-size) / 4);
        transform: rotate(-90deg);
        transition: transform var(--animation-duration) ease-out;
    }

    div.contents .toc.interactive.open > h3::before {
        transform: rotate(0deg);
    }

    div.contents .toc.interactive.open {
        max-height: 45vh;
        overflow: auto;
        transition: max-height 0.2s ease-in-out;
    }

    div.contents .toc a, div.contents .toc a.active {
        color: var(--primary-color) !important;
    }

    div.contents .toc a:hover {
        text-decoration: underline;
    }
}

/*
 Page Outline (Doxygen >= 1.14.0)
*/

#page-nav {
    background: var(--page-background-color);
    border-left: 1px solid var(--separator-color);
}

#page-nav #page-nav-resize-handle {
    background: var(--separator-color);
}

#page-nav #page-nav-resize-handle::after {
    border-left: 1px solid var(--primary-color);
    border-right: 1px solid var(--primary-color);
}

#page-nav #page-nav-tree #page-nav-contents {
    top: var(--spacing-large);
}

#page-nav ul.page-outline {
    margin: 0;
    padding: 0;
}

#page-nav ul.page-outline li a {
    font-size: var(--toc-font-size) !important;
    color: var(--page-secondary-foreground-color) !important;
    display: inline-block;
    line-height: calc(2 * var(--toc-font-size));
}

#page-nav ul.page-outline li a a.anchorlink {
    display: none;
}

#page-nav ul.page-outline li.vis ~ * a {
    color: var(--page-foreground-color) !important;
}

#page-nav ul.page-outline li.vis:not(.vis ~ .vis) a, #page-nav ul.page-outline li a:hover {
    color: var(--primary-color) !important;
}

#page-nav ul.page-outline .vis {
    background: var(--page-background-color);
    position: relative;
}

#page-nav ul.page-outline .vis::after {
    content: "";
    position: absolute;
    top: 0;
    bottom: 0;
    left: 0;
    width: 4px;
    background: var(--page-secondary-foreground-color);
}

#page-nav ul.page-outline .vis:not(.vis ~ .vis)::after {
    top: 1px;
    border-top-right-radius: var(--border-radius-small);
}

#page-nav ul.page-outline .vis:not(:has(~ .vis))::after {
    bottom: 1px;
    border-bottom-right-radius: var(--border-radius-small);
}


#page-nav ul.page-outline .arrow {
    display: inline-block;
}

#page-nav ul.page-outline .arrow span {
    display: none;
}

@media screen and (max-width: 767px) {
    #container {
        grid-template-columns: initial !important;
    }

    #page-nav {
        display: none;
    }
}

/*
 Code & Fragments
 */

code, div.fragment, pre.fragment, span.tt {
    border: 1px solid var(--separator-color);
    overflow: hidden;
}

code, span.tt {
    display: inline;
    background: var(--code-background);
    color: var(--code-foreground);
    padding: 2px 6px;
    border-radius: var(--border-radius-small);
}

div.fragment, pre.fragment {
    border-radius: var(--border-radius-medium);
    margin: var(--spacing-medium) 0;
    padding: calc(var(--spacing-large) - (var(--spacing-large) / 6)) var(--spacing-large);
    background: var(--fragment-background);
    color: var(--fragment-foreground);
    overflow-x: auto;
}

@media screen and (max-width: 767px) {
    div.fragment, pre.fragment {
        border-top-right-radius: 0;
        border-bottom-right-radius: 0;
        border-right: 0;
    }

    .contents > div.fragment,
    .textblock > div.fragment,
    .textblock > pre.fragment,
    .textblock > .tabbed > ul > li > div.fragment,
    .textblock > .tabbed > ul > li > pre.fragment,
    .contents > .doxygen-awesome-fragment-wrapper > div.fragment,
    .textblock > .doxygen-awesome-fragment-wrapper > div.fragment,
    .textblock > .doxygen-awesome-fragment-wrapper > pre.fragment,
    .textblock > .tabbed > ul > li > .doxygen-awesome-fragment-wrapper > div.fragment,
    .textblock > .tabbed > ul > li > .doxygen-awesome-fragment-wrapper > pre.fragment {
        margin: var(--spacing-medium) calc(0px - var(--spacing-large));
        border-radius: 0;
        border-left: 0;
    }

    .textblock li > .fragment,
    .textblock li > .doxygen-awesome-fragment-wrapper > .fragment {
        margin: var(--spacing-medium) calc(0px - var(--spacing-large));
    }

    .memdoc li > .fragment,
    .memdoc li > .doxygen-awesome-fragment-wrapper > .fragment {
        margin: var(--spacing-medium) calc(0px - var(--spacing-medium));
    }

    .textblock ul, .memdoc ul {
        overflow: initial;
    }

    .memdoc > div.fragment,
    .memdoc > pre.fragment,
    dl dd > div.fragment,
    dl dd pre.fragment,
    .memdoc > .doxygen-awesome-fragment-wrapper > div.fragment,
    .memdoc > .doxygen-awesome-fragment-wrapper > pre.fragment,
    dl dd > .doxygen-awesome-fragment-wrapper > div.fragment,
    dl dd .doxygen-awesome-fragment-wrapper > pre.fragment {
        margin: var(--spacing-medium) calc(0px - var(--spacing-medium));
        border-radius: 0;
        border-left: 0;
    }
}

code, code a, pre.fragment, div.fragment, div.fragment .line, div.fragment span, div.fragment .line a, div.fragment .line span, span.tt {
    font-family: var(--font-family-monospace);
    font-size: var(--code-font-size) !important;
}

div.line:after {
    margin-right: var(--spacing-medium);
}

div.fragment .line, pre.fragment {
    white-space: pre;
    word-wrap: initial;
    line-height: var(--fragment-lineheight);
}

div.fragment span.keyword {
    color: var(--fragment-keyword);
}

div.fragment span.keywordtype {
    color: var(--fragment-keywordtype);
}

div.fragment span.keywordflow {
    color: var(--fragment-keywordflow);
}

div.fragment span.stringliteral {
    color: var(--fragment-token)
}

div.fragment span.comment {
    color: var(--fragment-comment);
}

div.fragment a.code {
    color: var(--fragment-link) !important;
}

div.fragment span.preprocessor {
    color: var(--fragment-preprocessor);
}

div.fragment span.lineno {
    display: inline-block;
    width: 27px;
    border-right: none;
    background: var(--fragment-linenumber-background);
    color: var(--fragment-linenumber-color);
}

div.fragment span.lineno a {
    background: none;
    color: var(--fragment-link) !important;
}

div.fragment > .line:first-child .lineno {
    box-shadow: -999999px 0px 0 999999px var(--fragment-linenumber-background), -999998px 0px 0 999999px var(--fragment-linenumber-border);
    background-color: var(--fragment-linenumber-background) !important;
}

div.line {
    border-radius: var(--border-radius-small);
}

div.line.glow {
    background-color: var(--primary-light-color);
    box-shadow: none;
}

/*
 dl warning, attention, note, deprecated, bug, ...
 */

dl {
    line-height: calc(1.65 * var(--page-font-size));
}

dl.bug dt a, dl.deprecated dt a, dl.todo dt a {
    font-weight: bold !important;
}

dl.warning, dl.attention, dl.note, dl.deprecated, dl.bug, dl.invariant, dl.pre, dl.post, dl.todo, dl.remark {
    padding: var(--spacing-medium);
    margin: var(--spacing-medium) 0;
    color: var(--page-background-color);
    overflow: hidden;
    margin-left: 0;
    border-radius: var(--border-radius-small);
}

dl.section dd {
    margin-bottom: 2px;
}

dl.warning, dl.attention {
    background: var(--warning-color);
    border-left: 8px solid var(--warning-color-dark);
    color: var(--warning-color-darker);
}

dl.warning dt, dl.attention dt {
    color: var(--warning-color-dark);
}

dl.note, dl.remark {
    background: var(--note-color);
    border-left: 8px solid var(--note-color-dark);
    color: var(--note-color-darker);
}

dl.note dt, dl.remark dt {
    color: var(--note-color-dark);
}

dl.todo {
    background: var(--todo-color);
    border-left: 8px solid var(--todo-color-dark);
    color: var(--todo-color-darker);
}

dl.todo dt a {
    color: var(--todo-color-dark) !important;
}

dl.bug dt a {
    color: var(--todo-color-dark) !important;
}

dl.bug {
    background: var(--bug-color);
    border-left: 8px solid var(--bug-color-dark);
    color: var(--bug-color-darker);
}

dl.bug dt a {
    color: var(--bug-color-dark) !important;
}

dl.deprecated {
    background: var(--deprecated-color);
    border-left: 8px solid var(--deprecated-color-dark);
    color: var(--deprecated-color-darker);
}

dl.deprecated dt a {
    color: var(--deprecated-color-dark) !important;
}

dl.section dd, dl.bug dd, dl.deprecated dd, dl.todo dd {
    margin-inline-start: 0px;
}

dl.invariant, dl.pre, dl.post {
    background: var(--invariant-color);
    border-left: 8px solid var(--invariant-color-dark);
    color: var(--invariant-color-darker);
}

dl.invariant dt, dl.pre dt, dl.post dt {
    color: var(--invariant-color-dark);
}

/*
 memitem
 */

div.memdoc, div.memproto, h2.memtitle {
    box-shadow: none;
    background-image: none;
    border: none;
}

div.memdoc {
    padding: 0 var(--spacing-medium);
    background: var(--page-background-color);
}

h2.memtitle, div.memitem {
    border: 1px solid var(--separator-color);
    box-shadow: var(--box-shadow);
}

h2.memtitle {
    box-shadow: 0px var(--spacing-medium) 0 -1px var(--fragment-background), var(--box-shadow);
}

div.memitem {
    transition: none;
}

div.memproto, h2.memtitle {
    background: var(--fragment-background);
}

h2.memtitle {
    font-weight: 500;
    font-size: var(--memtitle-font-size);
    font-family: var(--font-family-monospace);
    border-bottom: none;
    border-top-left-radius: var(--border-radius-medium);
    border-top-right-radius: var(--border-radius-medium);
    word-break: break-all;
    position: relative;
}

h2.memtitle:after {
    content: "";
    display: block;
    background: var(--fragment-background);
    height: var(--spacing-medium);
    bottom: calc(0px - var(--spacing-medium));
    left: 0;
    right: -14px;
    position: absolute;
    border-top-right-radius: var(--border-radius-medium);
}

h2.memtitle > span.permalink {
    font-size: inherit;
}

h2.memtitle > span.permalink > a {
    text-decoration: none;
    padding-left: 3px;
    margin-right: -4px;
    user-select: none;
    display: inline-block;
    margin-top: -6px;
}

h2.memtitle > span.permalink > a:hover {
    color: var(--primary-dark-color) !important;
}

a:target + h2.memtitle, a:target + h2.memtitle + div.memitem {
    border-color: var(--primary-light-color);
}

div.memitem {
    border-top-right-radius: var(--border-radius-medium);
    border-bottom-right-radius: var(--border-radius-medium);
    border-bottom-left-radius: var(--border-radius-medium);
    border-top-left-radius: 0;
    overflow: hidden;
    display: block !important;
}

div.memdoc {
    border-radius: 0;
}

div.memproto {
    border-radius: 0 var(--border-radius-small) 0 0;
    overflow: auto;
    border-bottom: 1px solid var(--separator-color);
    padding: var(--spacing-medium);
    margin-bottom: -1px;
}

div.memtitle {
    border-top-right-radius: var(--border-radius-medium);
    border-top-left-radius: var(--border-radius-medium);
}

div.memproto table.memname {
    font-family: var(--font-family-monospace);
    color: var(--page-foreground-color);
    font-size: var(--memname-font-size);
    text-shadow: none;
}

div.memproto div.memtemplate {
    font-family: var(--font-family-monospace);
    color: var(--primary-dark-color);
    font-size: var(--memname-font-size);
    margin-left: 2px;
    text-shadow: none;
}

table.mlabels, table.mlabels > tbody {
    display: block;
}

td.mlabels-left {
    width: auto;
}

td.mlabels-right {
    margin-top: 3px;
    position: sticky;
    left: 0;
}

table.mlabels > tbody > tr:first-child {
    display: flex;
    justify-content: space-between;
    flex-wrap: wrap;
}

.memname, .memitem span.mlabels {
    margin: 0
}

/*
 reflist
 */

dl.reflist {
    box-shadow: var(--box-shadow);
    border-radius: var(--border-radius-medium);
    border: 1px solid var(--separator-color);
    overflow: hidden;
    padding: 0;
}


dl.reflist dt, dl.reflist dd {
    box-shadow: none;
    text-shadow: none;
    background-image: none;
    border: none;
    padding: 12px;
}


dl.reflist dt {
    font-weight: 500;
    border-radius: 0;
    background: var(--code-background);
    border-bottom: 1px solid var(--separator-color);
    color: var(--page-foreground-color)
}


dl.reflist dd {
    background: none;
}

/*
 Table
 */

.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname),
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody {
    display: inline-block;
    max-width: 100%;
}

.contents > table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname):not(.classindex) {
    margin-left: calc(0px - var(--spacing-large));
    margin-right: calc(0px - var(--spacing-large));
    max-width: calc(100% + 2 * var(--spacing-large));
}

table.fieldtable,
table.markdownTable tbody,
table.doxtable tbody {
    border: none;
    margin: var(--spacing-medium) 0;
    box-shadow: 0 0 0 1px var(--separator-color);
    border-radius: var(--border-radius-small);
}

table.markdownTable, table.doxtable, table.fieldtable {
    padding: 1px;
}

table.doxtable caption {
    display: block;
}

table.fieldtable {
    border-collapse: collapse;
    width: 100%;
}

th.markdownTableHeadLeft,
th.markdownTableHeadRight,
th.markdownTableHeadCenter,
th.markdownTableHeadNone,
table.doxtable th {
    background: var(--tablehead-background);
    color: var(--tablehead-foreground);
    font-weight: 600;
    font-size: var(--page-font-size);
}

th.markdownTableHeadLeft:first-child,
th.markdownTableHeadRight:first-child,
th.markdownTableHeadCenter:first-child,
th.markdownTableHeadNone:first-child,
table.doxtable tr th:first-child {
    border-top-left-radius: var(--border-radius-small);
}

th.markdownTableHeadLeft:last-child,
th.markdownTableHeadRight:last-child,
th.markdownTableHeadCenter:last-child,
th.markdownTableHeadNone:last-child,
table.doxtable tr th:last-child {
    border-top-right-radius: var(--border-radius-small);
}

table.markdownTable td,
table.markdownTable th,
table.fieldtable td,
table.fieldtable th,
table.doxtable td,
table.doxtable th {
    border: 1px solid var(--separator-color);
    padding: var(--spacing-small) var(--spacing-medium);
}

table.markdownTable td:last-child,
table.markdownTable th:last-child,
table.fieldtable td:last-child,
table.fieldtable th:last-child,
table.doxtable td:last-child,
table.doxtable th:last-child {
    border-right: none;
}

table.markdownTable td:first-child,
table.markdownTable th:first-child,
table.fieldtable td:first-child,
table.fieldtable th:first-child,
table.doxtable td:first-child,
table.doxtable th:first-child {
    border-left: none;
}

table.markdownTable tr:first-child td,
table.markdownTable tr:first-child th,
table.fieldtable tr:first-child td,
table.fieldtable tr:first-child th,
table.doxtable tr:first-child td,
table.doxtable tr:first-child th {
    border-top: none;
}

table.markdownTable tr:last-child td,
table.markdownTable tr:last-child th,
table.fieldtable tr:last-child td,
table.fieldtable tr:last-child th,
table.doxtable tr:last-child td,
table.doxtable tr:last-child th {
    border-bottom: none;
}

table.markdownTable tr, table.doxtable tr {
    border-bottom: 1px solid var(--separator-color);
}

table.markdownTable tr:last-child, table.doxtable tr:last-child {
    border-bottom: none;
}

.full_width_table table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) {
    display: block;
}

.full_width_table table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody {
    display: table;
    width: 100%;
}

table.fieldtable th {
    font-size: var(--page-font-size);
    font-weight: 600;
    background-image: none;
    background-color: var(--tablehead-background);
    color: var(--tablehead-foreground);
}

table.fieldtable td.fieldtype, .fieldtable td.fieldname, .fieldtable td.fieldinit, .fieldtable td.fielddoc, .fieldtable th {
    border-bottom: 1px solid var(--separator-color);
    border-right: 1px solid var(--separator-color);
}

table.fieldtable tr:last-child td:first-child {
    border-bottom-left-radius: var(--border-radius-small);
}

table.fieldtable tr:last-child td:last-child {
    border-bottom-right-radius: var(--border-radius-small);
}

.memberdecls td.glow, .fieldtable tr.glow {
    background-color: var(--primary-light-color);
    box-shadow: none;
}

table.memberdecls {
    display: block;
    -webkit-tap-highlight-color: transparent;
}

table.memberdecls tr[class^='memitem'] {
    font-family: var(--font-family-monospace);
    font-size: var(--code-font-size);
}

table.memberdecls tr[class^='memitem'] .memTemplParams {
    font-family: var(--font-family-monospace);
    font-size: var(--code-font-size);
    color: var(--primary-dark-color);
    white-space: normal;
}

table.memberdecls tr.heading + tr[class^='memitem'] td.memItemLeft,
table.memberdecls tr.heading + tr[class^='memitem'] td.memItemRight,
table.memberdecls td.memItemLeft,
table.memberdecls td.memItemRight,
table.memberdecls .memTemplItemLeft,
table.memberdecls .memTemplItemRight,
table.memberdecls .memTemplParams {
    transition: none;
    padding-top: var(--spacing-small);
    padding-bottom: var(--spacing-small);
    border-top: 1px solid var(--separator-color);
    border-bottom: 1px solid var(--separator-color);
    background-color: var(--fragment-background);
}

@media screen and (min-width: 768px) {

    tr.heading + tr[class^='memitem'] td.memItemRight, tr.groupHeader + tr[class^='memitem'] td.memItemRight, tr.inherit_header + tr[class^='memitem'] td.memItemRight {
        border-top-right-radius: var(--border-radius-small);
    }

    table.memberdecls tr:last-child td.memItemRight, table.memberdecls tr:last-child td.mdescRight, table.memberdecls tr[class^='memitem']:has(+ tr.groupHeader) td.memItemRight, table.memberdecls tr[class^='memitem']:has(+ tr.inherit_header) td.memItemRight, table.memberdecls tr[class^='memdesc']:has(+ tr.groupHeader) td.mdescRight, table.memberdecls tr[class^='memdesc']:has(+ tr.inherit_header) td.mdescRight {
        border-bottom-right-radius: var(--border-radius-small);
    }

    table.memberdecls tr:last-child td.memItemLeft, table.memberdecls tr:last-child td.mdescLeft, table.memberdecls tr[class^='memitem']:has(+ tr.groupHeader) td.memItemLeft, table.memberdecls tr[class^='memitem']:has(+ tr.inherit_header) td.memItemLeft, table.memberdecls tr[class^='memdesc']:has(+ tr.groupHeader) td.mdescLeft, table.memberdecls tr[class^='memdesc']:has(+ tr.inherit_header) td.mdescLeft {
        border-bottom-left-radius: var(--border-radius-small);
    }

    tr.heading + tr[class^='memitem'] td.memItemLeft, tr.groupHeader + tr[class^='memitem'] td.memItemLeft, tr.inherit_header + tr[class^='memitem'] td.memItemLeft {
        border-top-left-radius: var(--border-radius-small);
    }

}

table.memname td.memname {
    font-size: var(--memname-font-size);
}

table.memberdecls .memTemplItemLeft,
table.memberdecls .template .memItemLeft,
table.memberdecls .memTemplItemRight,
table.memberdecls .template .memItemRight {
    padding-top: 2px;
}

table.memberdecls .memTemplParams {
    border-bottom: 0;
    border-left: 1px solid var(--separator-color);
    border-right: 1px solid var(--separator-color);
    border-radius: var(--border-radius-small) var(--border-radius-small) 0 0;
    padding-bottom: var(--spacing-small);
}

table.memberdecls .memTemplItemLeft, table.memberdecls .template .memItemLeft {
    border-radius: 0 0 0 var(--border-radius-small);
    border-left: 1px solid var(--separator-color);
    border-top: 0;
}

table.memberdecls .memTemplItemRight, table.memberdecls .template .memItemRight {
    border-radius: 0 0 var(--border-radius-small) 0;
    border-right: 1px solid var(--separator-color);
    padding-left: 0;
    border-top: 0;
}

table.memberdecls .memItemLeft {
    border-radius: var(--border-radius-small) 0 0 var(--border-radius-small);
    border-left: 1px solid var(--separator-color);
    padding-left: var(--spacing-medium);
    padding-right: 0;
}

table.memberdecls .memItemRight  {
    border-radius: 0 var(--border-radius-small) var(--border-radius-small) 0;
    border-right: 1px solid var(--separator-color);
    padding-right: var(--spacing-medium);
    padding-left: 0;

}

table.memberdecls .mdescLeft, table.memberdecls .mdescRight {
    background: none;
    color: var(--page-foreground-color);
    padding: var(--spacing-small) 0;
    border: 0;
}

table.memberdecls [class^="memdesc"] {
    box-shadow: none;
}


table.memberdecls .memItemLeft,
table.memberdecls .memTemplItemLeft {
    padding-right: var(--spacing-medium);
}

table.memberdecls .memSeparator {
    background: var(--page-background-color);
    height: var(--spacing-large);
    border: 0;
    transition: none;
}

table.memberdecls .groupheader {
    margin-bottom: var(--spacing-large);
}

table.memberdecls .inherit_header td {
    padding: 0 0 var(--spacing-medium) 0;
    text-indent: -12px;
    color: var(--page-secondary-foreground-color);
}

table.memberdecls span.dynarrow {
    left: 10px;
}

table.memberdecls img[src="closed.png"],
table.memberdecls img[src="open.png"],
div.dynheader img[src="open.png"],
div.dynheader img[src="closed.png"] {
    width: 0;
    height: 0;
    border-left: 4px solid transparent;
    border-right: 4px solid transparent;
    border-top: 5px solid var(--primary-color);
    margin-top: 8px;
    display: block;
    float: left;
    margin-left: -10px;
    transition: transform var(--animation-duration) ease-out;
}

tr.heading + tr[class^='memitem'] td.memItemLeft, tr.groupHeader + tr[class^='memitem'] td.memItemLeft, tr.inherit_header + tr[class^='memitem'] td.memItemLeft, tr.heading + tr[class^='memitem'] td.memItemRight, tr.groupHeader + tr[class^='memitem'] td.memItemRight, tr.inherit_header + tr[class^='memitem'] td.memItemRight {
    border-top: 1px solid var(--separator-color);
}

table.memberdecls img {
    margin-right: 10px;
}

table.memberdecls img[src="closed.png"],
div.dynheader img[src="closed.png"] {
    transform: rotate(-90deg);

}

.compoundTemplParams {
    font-family: var(--font-family-monospace);
    color: var(--primary-dark-color);
    font-size: var(--code-font-size);
}

@media screen and (max-width: 767px) {

    table.memberdecls .memItemLeft,
    table.memberdecls .memItemRight,
    table.memberdecls .mdescLeft,
    table.memberdecls .mdescRight,
    table.memberdecls .memTemplItemLeft,
    table.memberdecls .memTemplItemRight,
    table.memberdecls .memTemplParams,
    table.memberdecls .template .memItemLeft,
    table.memberdecls .template .memItemRight,
    table.memberdecls .template .memParams {
        display: block;
        text-align: left;
        padding-left: var(--spacing-large);
        margin: 0 calc(0px - var(--spacing-large)) 0 calc(0px - var(--spacing-large));
        border-right: none;
        border-left: none;
        border-radius: 0;
        white-space: normal;
    }

    table.memberdecls .memItemLeft,
    table.memberdecls .mdescLeft,
    table.memberdecls .memTemplItemLeft,
    table.memberdecls .template .memItemLeft {
        border-bottom: 0 !important;
        padding-bottom: 0 !important;
    }

    table.memberdecls .memTemplItemLeft,
    table.memberdecls .template .memItemLeft {
        padding-top: 0;
    }

    table.memberdecls .mdescLeft {
        margin-bottom: calc(0px - var(--page-font-size));
    }

    table.memberdecls .memItemRight,
    table.memberdecls .mdescRight,
    table.memberdecls .memTemplItemRight,
    table.memberdecls .template .memItemRight {
        border-top: 0 !important;
        padding-top: 0 !important;
        padding-right: var(--spacing-large);
        padding-bottom: var(--spacing-medium);
        overflow-x: auto;
    }

    table.memberdecls tr[class^='memitem']:not(.inherit) {
        display: block;
        width: calc(100vw - 2 * var(--spacing-large));
    }

    table.memberdecls .mdescRight {
        color: var(--page-foreground-color);
    }

    table.memberdecls tr.inherit {
        visibility: hidden;
    }

    table.memberdecls tr[style="display: table-row;"] {
        display: block !important;
        visibility: visible;
        width: calc(100vw - 2 * var(--spacing-large));
        animation: fade .5s;
    }

    @keyframes fade {
        0% {
            opacity: 0;
            max-height: 0;
        }

        100% {
            opacity: 1;
            max-height: 200px;
        }
    }

    tr.heading + tr[class^='memitem'] td.memItemRight, tr.groupHeader + tr[class^='memitem'] td.memItemRight, tr.inherit_header + tr[class^='memitem'] td.memItemRight {
        border-top-right-radius: 0;
    }

    table.memberdecls tr:last-child td.memItemRight, table.memberdecls tr:last-child td.mdescRight, table.memberdecls tr[class^='memitem']:has(+ tr.groupHeader) td.memItemRight, table.memberdecls tr[class^='memitem']:has(+ tr.inherit_header) td.memItemRight, table.memberdecls tr[class^='memdesc']:has(+ tr.groupHeader) td.mdescRight, table.memberdecls tr[class^='memdesc']:has(+ tr.inherit_header) td.mdescRight {
        border-bottom-right-radius: 0;
    }

    table.memberdecls tr:last-child td.memItemLeft, table.memberdecls tr:last-child td.mdescLeft, table.memberdecls tr[class^='memitem']:has(+ tr.groupHeader) td.memItemLeft, table.memberdecls tr[class^='memitem']:has(+ tr.inherit_header) td.memItemLeft, table.memberdecls tr[class^='memdesc']:has(+ tr.groupHeader) td.mdescLeft, table.memberdecls tr[class^='memdesc']:has(+ tr.inherit_header) td.mdescLeft {
        border-bottom-left-radius: 0;
    }

    tr.heading + tr[class^='memitem'] td.memItemLeft, tr.groupHeader + tr[class^='memitem'] td.memItemLeft, tr.inherit_header + tr[class^='memitem'] td.memItemLeft {
        border-top-left-radius: 0;
    }
}


/*
 Horizontal Rule
 */

hr {
    margin-top: var(--spacing-large);
    margin-bottom: var(--spacing-large);
    height: 1px;
    background-color: var(--separator-color);
    border: 0;
}

.contents hr {
    box-shadow: 100px 0 var(--separator-color),
                -100px 0 var(--separator-color),
                500px 0 var(--separator-color),
                -500px 0 var(--separator-color),
                900px 0 var(--separator-color),
                -900px 0 var(--separator-color),
                1400px 0 var(--separator-color),
                -1400px 0 var(--separator-color),
                1900px 0 var(--separator-color),
                -1900px 0 var(--separator-color);
}

.contents img, .contents .center, .contents center, .contents div.image object {
    max-width: 100%;
    overflow: auto;
}

@media screen and (max-width: 767px) {
    .contents .dyncontent > .center, .contents > center {
        margin-left: calc(0px - var(--spacing-large));
        margin-right: calc(0px - var(--spacing-large));
        max-width: calc(100% + 2 * var(--spacing-large));
    }
}

/*
 Directories
 */
div.directory {
    border-top: 1px solid var(--separator-color);
    border-bottom: 1px solid var(--separator-color);
    width: auto;
}

table.directory {
    font-family: var(--font-family);
    font-size: var(--page-font-size);
    font-weight: normal;
    width: 100%;
}

table.directory td.entry, table.directory td.desc {
    padding: calc(var(--spacing-small) / 2) var(--spacing-small);
    line-height: var(--table-line-height);
}

table.directory tr.even td:last-child {
    border-radius: 0 var(--border-radius-small) var(--border-radius-small) 0;
}

table.directory tr.even td:first-child {
    border-radius: var(--border-radius-small) 0 0 var(--border-radius-small);
}

table.directory tr.even:last-child td:last-child {
    border-radius: 0 var(--border-radius-small) 0 0;
}

table.directory tr.even:last-child td:first-child {
    border-radius: var(--border-radius-small) 0 0 0;
}

table.directory td.desc {
    min-width: 250px;
}

table.directory tr.even {
    background-color: var(--odd-color);
}

table.directory tr.odd {
    background-color: transparent;
}

.icona {
    width: auto;
    height: auto;
    margin: 0 var(--spacing-small);
}

.icon {
    background: var(--primary-color);
    border-radius: var(--border-radius-small);
    font-size: var(--page-font-size);
    padding: calc(var(--page-font-size) / 5);
    line-height: var(--page-font-size);
    transform: scale(0.8);
    height: auto;
    width: var(--page-font-size);
    user-select: none;
}

.iconfopen, .icondoc, .iconfclosed {
    background-position: center;
    margin-bottom: 0;
    height: var(--table-line-height);
}

.icondoc {
    filter: saturate(0.2);
}

@media screen and (max-width: 767px) {
    div.directory {
        margin-left: calc(0px - var(--spacing-large));
        margin-right: calc(0px - var(--spacing-large));
    }
}

@media (prefers-color-scheme: dark) {
    html:not(.light-mode) .iconfopen, html:not(.light-mode) .iconfclosed {
        filter: hue-rotate(180deg) invert();
    }
}

html.dark-mode .iconfopen, html.dark-mode .iconfclosed {
    filter: hue-rotate(180deg) invert();
}

/*
 Class list
 */

.classindex dl.odd {
    background: var(--odd-color);
    border-radius: var(--border-radius-small);
}

.classindex dl.even {
    background-color: transparent;
}

/*
 Class Index Doxygen 1.8
*/

table.classindex {
    margin-left: 0;
    margin-right: 0;
    width: 100%;
}

table.classindex table div.ah {
    background-image: none;
    background-color: initial;
    border-color: var(--separator-color);
    color: var(--page-foreground-color);
    box-shadow: var(--box-shadow);
    border-radius: var(--border-radius-large);
    padding: var(--spacing-small);
}

div.qindex {
    background-color: var(--odd-color);
    border-radius: var(--border-radius-small);
    border: 1px solid var(--separator-color);
    padding: var(--spacing-small) 0;
}

/*
  Footer and nav-path
 */

#nav-path {
    width: 100%;
}

#nav-path ul {
    background-image: none;
    background: var(--page-background-color);
    border: none;
    border-top: 1px solid var(--separator-color);
    border-bottom: 0;
    font-size: var(--navigation-font-size);
}

img.footer {
    width: 60px;
}

.navpath li.footer {
    color: var(--page-secondary-foreground-color);
}

address.footer {
    color: var(--page-secondary-foreground-color);
    margin-bottom: var(--spacing-large);
}

#nav-path li.navelem {
    background-image: none;
    display: flex;
    align-items: center;
}

.navpath li.navelem a {
    text-shadow: none;
    display: inline-block;
    color: var(--primary-color) !important;
}

.navpath li.navelem a:hover {
    text-shadow: none;
}

.navpath li.navelem b {
    color: var(--primary-dark-color);
    font-weight: 500;
}

li.navelem {
    padding: 0;
    margin-left: -8px;
}

li.navelem:first-child {
    margin-left: var(--spacing-large);
}

li.navelem:first-child:before {
    display: none;
}

#nav-path ul {
    padding-left: 0;
}

#nav-path li.navelem:has(.el):after {
    content: '';
    border: 5px solid var(--page-background-color);
    border-bottom-color: transparent;
    border-right-color: transparent;
    border-top-color: transparent;
    transform: translateY(-1px) scaleY(4.2);
    z-index: 10;
    margin-left: 6px;
}

#nav-path li.navelem:not(:has(.el)):after {
    background: var(--page-background-color);
    box-shadow: 1px -1px 0 1px var(--separator-color);
    border-radius: 0 var(--border-radius-medium) 0 50px;
}

#nav-path li.navelem:not(:has(.el)) {
    margin-left: 0;
}

#nav-path li.navelem:not(:has(.el)):hover, #nav-path li.navelem:not(:has(.el)):hover:after {
    background-color: var(--separator-color);
}

#nav-path li.navelem:has(.el):before {
    content: '';
    border: 5px solid var(--separator-color);
    border-bottom-color: transparent;
    border-right-color: transparent;
    border-top-color: transparent;
    transform: translateY(-1px) scaleY(3.2);
    margin-right: var(--spacing-small);
}

.navpath li.navelem a:hover {
    color: var(--primary-color);
}

/*
 Scrollbars for Webkit
*/

#nav-tree::-webkit-scrollbar,
div.fragment::-webkit-scrollbar,
pre.fragment::-webkit-scrollbar,
div.memproto::-webkit-scrollbar,
.contents center::-webkit-scrollbar,
.contents .center::-webkit-scrollbar,
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody::-webkit-scrollbar,
div.contents .toc::-webkit-scrollbar,
.contents .dotgraph::-webkit-scrollbar,
.contents .tabs-overview-container::-webkit-scrollbar {
    background: transparent;
    width: calc(var(--webkit-scrollbar-size) + var(--webkit-scrollbar-padding) + var(--webkit-scrollbar-padding));
    height: calc(var(--webkit-scrollbar-size) + var(--webkit-scrollbar-padding) + var(--webkit-scrollbar-padding));
}

#nav-tree::-webkit-scrollbar-thumb,
div.fragment::-webkit-scrollbar-thumb,
pre.fragment::-webkit-scrollbar-thumb,
div.memproto::-webkit-scrollbar-thumb,
.contents center::-webkit-scrollbar-thumb,
.contents .center::-webkit-scrollbar-thumb,
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody::-webkit-scrollbar-thumb,
div.contents .toc::-webkit-scrollbar-thumb,
.contents .dotgraph::-webkit-scrollbar-thumb,
.contents .tabs-overview-container::-webkit-scrollbar-thumb {
    background-color: transparent;
    border: var(--webkit-scrollbar-padding) solid transparent;
    border-radius: calc(var(--webkit-scrollbar-padding) + var(--webkit-scrollbar-padding));
    background-clip: padding-box;
}

#nav-tree:hover::-webkit-scrollbar-thumb,
div.fragment:hover::-webkit-scrollbar-thumb,
pre.fragment:hover::-webkit-scrollbar-thumb,
div.memproto:hover::-webkit-scrollbar-thumb,
.contents center:hover::-webkit-scrollbar-thumb,
.contents .center:hover::-webkit-scrollbar-thumb,
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody:hover::-webkit-scrollbar-thumb,
div.contents .toc:hover::-webkit-scrollbar-thumb,
.contents .dotgraph:hover::-webkit-scrollbar-thumb,
.contents .tabs-overview-container:hover::-webkit-scrollbar-thumb {
    background-color: var(--webkit-scrollbar-color);
}

#nav-tree::-webkit-scrollbar-track,
div.fragment::-webkit-scrollbar-track,
pre.fragment::-webkit-scrollbar-track,
div.memproto::-webkit-scrollbar-track,
.contents center::-webkit-scrollbar-track,
.contents .center::-webkit-scrollbar-track,
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody::-webkit-scrollbar-track,
div.contents .toc::-webkit-scrollbar-track,
.contents .dotgraph::-webkit-scrollbar-track,
.contents .tabs-overview-container::-webkit-scrollbar-track {
    background: transparent;
}

#nav-tree::-webkit-scrollbar-corner {
    background-color: var(--side-nav-background);
}

#nav-tree,
div.fragment,
pre.fragment,
div.memproto,
.contents center,
.contents .center,
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody,
div.contents .toc {
    overflow-x: auto;
    overflow-x: overlay;
}

#nav-tree {
    overflow-x: auto;
    overflow-y: auto;
    overflow-y: overlay;
}

/*
 Scrollbars for Firefox
*/

#nav-tree,
div.fragment,
pre.fragment,
div.memproto,
.contents center,
.contents .center,
.contents table:not(.memberdecls):not(.mlabels):not(.fieldtable):not(.memname) tbody,
div.contents .toc,
.contents .dotgraph,
.contents .tabs-overview-container {
    scrollbar-width: thin;
}

/*
  Optional Dark mode toggle button
*/

doxygen-awesome-dark-mode-toggle {
    display: inline-block;
    margin: 0 0 0 var(--spacing-small);
    padding: 0;
    width: var(--searchbar-height);
    height: var(--searchbar-height);
    background: none;
    border: none;
    border-radius: var(--searchbar-border-radius);
    vertical-align: middle;
    text-align: center;
    line-height: var(--searchbar-height);
    font-size: 22px;
    display: flex;
    align-items: center;
    justify-content: center;
    user-select: none;
    cursor: pointer;
}

doxygen-awesome-dark-mode-toggle > svg {
    transition: transform var(--animation-duration) ease-in-out;
}

doxygen-awesome-dark-mode-toggle:active > svg {
    transform: scale(.5);
}

doxygen-awesome-dark-mode-toggle:hover {
    background-color: rgba(0,0,0,.03);
}

html.dark-mode doxygen-awesome-dark-mode-toggle:hover {
    background-color: rgba(0,0,0,.18);
}

/*
 Optional fragment copy button
*/
.doxygen-awesome-fragment-wrapper {
    position: relative;
}

doxygen-awesome-fragment-copy-button {
    opacity: 0;
    background: var(--fragment-background);
    width: 28px;
    height: 28px;
    position: absolute;
    right: calc(var(--spacing-large) - (var(--spacing-large) / 2.5));
    top: calc(var(--spacing-large) - (var(--spacing-large) / 2.5));
    border: 1px solid var(--fragment-foreground);
    cursor: pointer;
    border-radius: var(--border-radius-small);
    display: flex;
    justify-content: center;
    align-items: center;
}

.doxygen-awesome-fragment-wrapper:hover doxygen-awesome-fragment-copy-button, doxygen-awesome-fragment-copy-button.success {
    opacity: .28;
}

doxygen-awesome-fragment-copy-button:hover, doxygen-awesome-fragment-copy-button.success {
    opacity: 1 !important;
}

doxygen-awesome-fragment-copy-button:active:not([class~=success]) svg {
    transform: scale(.91);
}

doxygen-awesome-fragment-copy-button svg {
    fill: var(--fragment-foreground);
    width: 18px;
    height: 18px;
}

doxygen-awesome-fragment-copy-button.success svg {
    fill: rgb(14, 168, 14);
}

doxygen-awesome-fragment-copy-button.success {
    border-color: rgb(14, 168, 14);
}

@media screen and (max-width: 767px) {
    .textblock > .doxygen-awesome-fragment-wrapper > doxygen-awesome-fragment-copy-button,
    .textblock li > .doxygen-awesome-fragment-wrapper > doxygen-awesome-fragment-copy-button,
    .memdoc li > .doxygen-awesome-fragment-wrapper > doxygen-awesome-fragment-copy-button,
    .memdoc > .doxygen-awesome-fragment-wrapper > doxygen-awesome-fragment-copy-button,
    dl dd > .doxygen-awesome-fragment-wrapper > doxygen-awesome-fragment-copy-button {
        right: 0;
    }
}

/*
 Optional paragraph link button
*/

a.anchorlink {
    font-size: 90%;
    margin-left: var(--spacing-small);
    color: var(--page-foreground-color) !important;
    text-decoration: none;
    opacity: .15;
    display: none;
    transition: opacity var(--animation-duration) ease-in-out, color var(--animation-duration) ease-in-out;
}

a.anchorlink svg {
    fill: var(--page-foreground-color);
}

h3 a.anchorlink svg, h4 a.anchorlink svg {
    margin-bottom: -3px;
    margin-top: -4px;
}

a.anchorlink:hover {
    opacity: .45;
}

h2:hover a.anchorlink, h1:hover a.anchorlink, h3:hover a.anchorlink, h4:hover a.anchorlink  {
    display: inline-block;
}

/*
 Optional tab feature
*/

.tabbed > ul {
    padding-inline-start: 0px;
    margin: 0;
    padding: var(--spacing-small) 0;
}

.tabbed > ul > li {
    display: none;
}

.tabbed > ul > li.selected {
    display: block;
}

.tabs-overview-container {
    overflow-x: auto;
    display: block;
    overflow-y: visible;
}

.tabs-overview {
    border-bottom: 1px solid var(--separator-color);
    display: flex;
    flex-direction: row;
}

@media screen and (max-width: 767px) {
    .tabs-overview-container {
        margin: 0 calc(0px - var(--spacing-large));
    }
    .tabs-overview {
        padding: 0 var(--spacing-large)
    }
}

.tabs-overview button.tab-button {
    color: var(--page-foreground-color);
    margin: 0;
    border: none;
    background: transparent;
    padding: calc(var(--spacing-large) / 2) 0;
    display: inline-block;
    font-size: var(--page-font-size);
    cursor: pointer;
    box-shadow: 0 1px 0 0 var(--separator-color);
    position: relative;

    -webkit-tap-highlight-color: transparent;
}

.tabs-overview button.tab-button .tab-title::before {
    display: block;
    content: attr(title);
    font-weight: 600;
    height: 0;
    overflow: hidden;
    visibility: hidden;
}

.tabs-overview button.tab-button .tab-title {
    float: left;
    white-space: nowrap;
    font-weight: normal;
    font-family: var(--font-family);
    padding: calc(var(--spacing-large) / 2) var(--spacing-large);
    border-radius: var(--border-radius-medium);
    transition: background-color var(--animation-duration) ease-in-out, font-weight var(--animation-duration) ease-in-out;
}

.tabs-overview button.tab-button:not(:last-child) .tab-title {
    box-shadow: 8px 0 0 -7px var(--separator-color);
}

.tabs-overview button.tab-button:hover .tab-title {
    background: var(--separator-color);
    box-shadow: none;
}

.tabs-overview button.tab-button.active .tab-title {
    font-weight: 600;
}

.tabs-overview button.tab-button::after {
    content: '';
    display: block;
    position: absolute;
    left: 0;
    bottom: 0;
    right: 0;
    height: 0;
    width: 0%;
    margin: 0 auto;
    border-radius: var(--border-radius-small) var(--border-radius-small) 0 0;
    background-color: var(--primary-color);
    transition: width var(--animation-duration) ease-in-out, height var(--animation-duration) ease-in-out;
}

.tabs-overview button.tab-button.active::after {
    width: 100%;
    box-sizing: border-box;
    height: 3px;
}


/*
 Navigation Buttons
*/

.section_buttons:not(:empty) {
    margin-top: calc(var(--spacing-large) * 3);
}

.section_buttons table.markdownTable {
    display: block;
    width: 100%;
}

.section_buttons table.markdownTable tbody {
    display: table !important;
    width: 100%;
    box-shadow: none;
    border-spacing: 10px;
}

.section_buttons table.markdownTable td {
    padding: 0;
}

.section_buttons table.markdownTable th {
    display: none;
}

.section_buttons table.markdownTable tr.markdownTableHead {
    border: none;
}

.section_buttons tr th, .section_buttons tr td {
    background: none;
    border: none;
    padding: var(--spacing-large) 0 var(--spacing-small);
}

.section_buttons a {
    display: inline-block;
    border: 1px solid var(--separator-color);
    border-radius: var(--border-radius-medium);
    color: var(--page-secondary-foreground-color) !important;
    text-decoration: none;
    transition: color var(--animation-duration) ease-in-out, background-color var(--animation-duration) ease-in-out;
}

.section_buttons a:hover {
    color: var(--page-foreground-color) !important;
    background-color: var(--odd-color);
}

.section_buttons tr td.markdownTableBodyLeft a {
    padding: var(--spacing-medium) var(--spacing-large) var(--spacing-medium) calc(var(--spacing-large) / 2);
}

.section_buttons tr td.markdownTableBodyRight a {
    padding: var(--spacing-medium) calc(var(--spacing-large) / 2) var(--spacing-medium) var(--spacing-large);
}

.section_buttons tr td.markdownTableBodyLeft a::before,
.section_buttons tr td.markdownTableBodyRight a::after {
    color: var(--page-secondary-foreground-color) !important;
    display: inline-block;
    transition: color .08s ease-in-out, transform .09s ease-in-out;
}

.section_buttons tr td.markdownTableBodyLeft a::before {
    content: '〈';
    padding-right: var(--spacing-large);
}


.section_buttons tr td.markdownTableBodyRight a::after {
    content: '〉';
    padding-left: var(--spacing-large);
}


.section_buttons tr td.markdownTableBodyLeft a:hover::before {
    color: var(--page-foreground-color) !important;
    transform: translateX(-3px);
}

.section_buttons tr td.markdownTableBodyRight a:hover::after {
    color: var(--page-foreground-color) !important;
    transform: translateX(3px);
}

@media screen and (max-width: 450px) {
    .section_buttons a {
        width: 100%;
        box-sizing: border-box;
    }

    .section_buttons tr td:nth-of-type(1).markdownTableBodyLeft a {
        border-radius: var(--border-radius-medium) 0 0 var(--border-radius-medium);
        border-right: none;
    }

    .section_buttons tr td:nth-of-type(2).markdownTableBodyRight a {
        border-radius: 0 var(--border-radius-medium) var(--border-radius-medium) 0;
    }
}

/*
 Bordered image
*/

html.dark-mode .darkmode_inverted_image img, /* < doxygen 1.9.3 */
html.dark-mode .darkmode_inverted_image object[type="image/svg+xml"] /* doxygen 1.9.3 */ {
    filter: brightness(89%) hue-rotate(180deg) invert();
}

.bordered_image {
    border-radius: var(--border-radius-small);
    border: 1px solid var(--separator-color);
    display: inline-block;
    overflow: hidden;
}

.bordered_image:empty {
    border: none;
}

html.dark-mode .bordered_image img, /* < doxygen 1.9.3 */
html.dark-mode .bordered_image object[type="image/svg+xml"] /* doxygen 1.9.3 */ {
    border-radius: var(--border-radius-small);
}

/*
 Button
*/

.primary-button {
    display: inline-block;
    cursor: pointer;
    background: var(--primary-color);
    color: var(--page-background-color) !important;
    border-radius: var(--border-radius-medium);
    padding: var(--spacing-small) var(--spacing-medium);
    text-decoration: none;
}

.primary-button:hover {
    background: var(--primary-dark-color);
}


================================================
FILE: examples/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.12)

# Macro to add example executables
macro(add_example EXAMPLE_NAME)
    add_executable(${EXAMPLE_NAME} ${EXAMPLE_NAME}.cpp)
    target_link_libraries(${EXAMPLE_NAME} dispenso)
    target_compile_features(${EXAMPLE_NAME} PRIVATE cxx_std_14)
endmacro()

# Add all examples
add_example(parallel_for_example)
add_example(for_each_example)
add_example(task_set_example)
add_example(future_example)
add_example(graph_example)
add_example(pipeline_example)
add_example(concurrent_vector_example)
add_example(latch_example)
add_example(resource_pool_example)


================================================
FILE: examples/concurrent_vector_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example concurrent_vector_example.cpp
 * Demonstrates thread-safe vector operations with dispenso::ConcurrentVector.
 */

#include <dispenso/concurrent_vector.h>
#include <dispenso/parallel_for.h>

#include <algorithm>
#include <iostream>
#include <numeric>

int main() {
  // Example 1: Basic concurrent push_back
  std::cout << "Example 1: Concurrent push_back from multiple threads\n";
  {
    dispenso::ConcurrentVector<int> vec;

    // Push elements concurrently
    dispenso::parallel_for(0, 1000, [&vec](size_t i) { vec.push_back(static_cast<int>(i)); });

    std::cout << "  Vector size after concurrent pushes: " << vec.size() << "\n";
    std::cout << "  (Note: order may vary due to concurrent access)\n";
  }

  // Example 2: grow_by for batch insertion
  std::cout << "\nExample 2: grow_by for efficient batch insertion\n";
  {
    dispenso::ConcurrentVector<double> vec;

    // Each thread grows the vector by a batch
    dispenso::parallel_for(0, 10, [&vec](size_t threadId) {
      auto it = vec.grow_by(100, static_cast<double>(threadId));
      (void)it; // Iterator points to start of grown range
    });

    std::cout << "  Vector size: " << vec.size() << " (expected: 1000)\n";

    // Count occurrences
    std::array<int, 10> counts = {0};
    for (double val : vec) {
      counts[static_cast<size_t>(val)]++;
    }
    std::cout << "  Each thread's value appears 100 times: ";
    std::cout << (std::all_of(counts.begin(), counts.end(), [](int c) { return c == 100; }) ? "yes"
                                                                                            : "no")
              << "\n";
  }

  // Example 3: Using grow_by_generator
  std::cout << "\nExample 3: grow_by_generator with custom initialization\n";
  {
    dispenso::ConcurrentVector<int> vec;

    // Grow with a generator function
    int startValue = 0;
    vec.grow_by_generator(10, [&startValue]() { return startValue++; });

    std::cout << "  Generated values: ";
    for (int val : vec) {
      std::cout << val << " ";
    }
    std::cout << "\n";
  }

  // Example 4: grow_to_at_least
  std::cout << "\nExample 4: grow_to_at_least for minimum size guarantee\n";
  {
    dispenso::ConcurrentVector<int> vec;

    // Multiple threads try to ensure minimum size
    dispenso::parallel_for(0, 10, [&vec](size_t i) {
      size_t requiredSize = (i + 1) * 100;
      vec.grow_to_at_least(requiredSize, -1);
    });

    std::cout << "  Vector size: " << vec.size() << " (at least 1000)\n";
  }

  // Example 5: Reserving capacity with ReserveTag
  std::cout << "\nExample 5: Reserving capacity for better performance\n";
  {
    // Reserve space upfront for better memory allocation
    dispenso::ConcurrentVector<int> vec(10000, dispenso::ReserveTag);

    std::cout << "  Capacity after reserve: " << vec.capacity() << "\n";
    std::cout << "  Size (still empty): " << vec.size() << "\n";

    // Now concurrent operations will be faster
    dispenso::parallel_for(0, 5000, [&vec](size_t i) { vec.push_back(static_cast<int>(i)); });

    std::cout << "  Size after pushes: " << vec.size() << "\n";
  }

  // Example 6: Iterator stability
  std::cout << "\nExample 6: Iterator stability during concurrent modification\n";
  {
    dispenso::ConcurrentVector<int> vec;
    vec.push_back(1);
    vec.push_back(2);
    vec.push_back(3);

    // Get iterators to existing elements
    auto it = vec.begin();
    int& firstElement = *it;

    // Push more elements concurrently
    dispenso::parallel_for(0, 100, [&vec](size_t i) { vec.push_back(static_cast<int>(i + 100)); });

    // Original iterator and reference are still valid
    std::cout << "  First element via original iterator: " << *it << "\n";
    std::cout << "  First element via original reference: " << firstElement << "\n";
    std::cout << "  Vector size now: " << vec.size() << "\n";
  }

  // Example 7: Standard container operations
  std::cout << "\nExample 7: Standard container operations\n";
  {
    dispenso::ConcurrentVector<int> vec = {5, 3, 1, 4, 2};

    // Sorting (not concurrent - use single thread)
    std::sort(vec.begin(), vec.end());

    std::cout << "  Sorted: ";
    for (int val : vec) {
      std::cout << val << " ";
    }
    std::cout << "\n";

    // Access by index
    std::cout << "  vec[2] = " << vec[2] << "\n";

    // Front and back
    std::cout << "  front() = " << vec.front() << ", back() = " << vec.back() << "\n";
  }

  // Example 8: Copy and move semantics
  std::cout << "\nExample 8: Copy and move semantics\n";
  {
    dispenso::ConcurrentVector<int> original = {1, 2, 3, 4, 5};

    // Copy
    dispenso::ConcurrentVector<int> copied = original;
    std::cout << "  Original size: " << original.size() << ", Copy size: " << copied.size() << "\n";

    // Move
    dispenso::ConcurrentVector<int> moved = std::move(original);
    std::cout << "  After move - Original size: " << original.size()
              << ", Moved size: " << moved.size() << "\n";
  }

  std::cout << "\nAll ConcurrentVector examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/for_each_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example for_each_example.cpp
 * Demonstrates parallel iteration over containers using dispenso::for_each.
 */

#include <dispenso/for_each.h>

#include <cmath>
#include <iostream>
#include <list>
#include <vector>

int main() {
  // Example 1: for_each on a vector
  std::cout << "Example 1: for_each on a vector\n";
  std::vector<double> values = {1.0, 4.0, 9.0, 16.0, 25.0, 36.0, 49.0, 64.0};

  // Apply square root to each element in parallel
  dispenso::for_each(values.begin(), values.end(), [](double& val) { val = std::sqrt(val); });

  std::cout << "  Square roots: ";
  for (double v : values) {
    std::cout << v << " ";
  }
  std::cout << "\n";

  // Example 2: for_each on a list (non-contiguous memory)
  std::cout << "\nExample 2: for_each on a std::list\n";
  std::list<int> numbers = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};

  // Double each value in parallel
  dispenso::for_each(numbers.begin(), numbers.end(), [](int& n) { n *= 2; });

  std::cout << "  Doubled values: ";
  for (int n : numbers) {
    std::cout << n << " ";
  }
  std::cout << "\n";

  // Example 3: for_each_n with a count
  std::cout << "\nExample 3: for_each_n with explicit count\n";
  std::vector<int> partial = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100};

  // Only process first 5 elements
  dispenso::for_each_n(partial.begin(), 5, [](int& n) { n += 100; });

  std::cout << "  After adding 100 to first 5: ";
  for (int n : partial) {
    std::cout << n << " ";
  }
  std::cout << "\n";

  // Example 4: for_each with a TaskSet for external synchronization
  std::cout << "\nExample 4: for_each with explicit TaskSet\n";
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());

  std::vector<int> data = {1, 2, 3, 4, 5};

  // Use options to control behavior
  dispenso::ForEachOptions options;
  options.wait = true;

  dispenso::for_each(taskSet, data.begin(), data.end(), [](int& n) { n = n * n; }, options);

  std::cout << "  Squared values: ";
  for (int n : data) {
    std::cout << n << " ";
  }
  std::cout << "\n";

  // Example 5: for_each with limited parallelism
  std::cout << "\nExample 5: for_each with limited parallelism\n";
  std::vector<int> limited(100, 1);

  dispenso::ForEachOptions limitedOptions;
  limitedOptions.maxThreads = 2;

  dispenso::for_each(limited.begin(), limited.end(), [](int& n) { n += 1; }, limitedOptions);

  std::cout << "  First few values: " << limited[0] << " " << limited[1] << " " << limited[2]
            << " (all should be 2)\n";

  std::cout << "\nAll for_each examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/future_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example future_example.cpp
 * Demonstrates creating and chaining futures with dispenso::Future.
 */

#include <dispenso/future.h>

#include <chrono>
#include <cmath>
#include <iostream>
#include <thread>

int main() {
  // Example 1: Basic async execution
  std::cout << "Example 1: Basic async execution\n";
  {
    // Launch an async computation
    dispenso::Future<int> future = dispenso::async([]() {
      // Simulate some work
      int result = 0;
      for (int i = 1; i <= 100; ++i) {
        result += i;
      }
      return result;
    });

    // Do other work while computation runs...
    std::cout << "  Computing sum of 1..100 asynchronously...\n";

    // Get the result (blocks until ready)
    int result = future.get();
    std::cout << "  Result: " << result << " (expected: 5050)\n";
  }

  // Example 2: Chaining futures with then()
  std::cout << "\nExample 2: Chaining futures with then()\n";
  {
    dispenso::Future<double> chainedFuture = dispenso::async([]() {
                                               return 16.0; // First computation
                                             })
                                                 .then([](dispenso::Future<double>&& prev) {
                                                   return std::sqrt(prev.get()); // Chain: take sqrt
                                                 })
                                                 .then([](dispenso::Future<double>&& prev) {
                                                   return prev.get() * 2.0; // Chain: multiply by 2
                                                 });

    std::cout << "  sqrt(16) * 2 = " << chainedFuture.get() << " (expected: 8)\n";
  }

  // Example 3: make_ready_future for immediate values
  std::cout << "\nExample 3: make_ready_future for immediate values\n";
  {
    // Create a future that's already ready
    dispenso::Future<std::string> ready = dispenso::make_ready_future(std::string("Hello, World!"));

    std::cout << "  is_ready: " << (ready.is_ready() ? "true" : "false") << "\n";
    std::cout << "  Value: " << ready.get() << "\n";
  }

  // Example 4: wait_for with timeout
  std::cout << "\nExample 4: wait_for with timeout\n";
  {
    dispenso::Future<int> slowFuture = dispenso::async(std::launch::async, []() {
      std::this_thread::sleep_for(std::chrono::milliseconds(100));
      return 42;
    });

    // Try to wait with a short timeout
    auto status = slowFuture.wait_for(std::chrono::milliseconds(10));
    std::cout << "  After 10ms wait: "
              << (status == std::future_status::ready ? "ready" : "timeout") << "\n";

    // Wait for completion
    slowFuture.wait();
    std::cout << "  After full wait: " << slowFuture.get() << "\n";
  }

  // Example 5: when_all to wait for multiple futures
  std::cout << "\nExample 5: when_all for multiple futures\n";
  {
    dispenso::Future<int> f1 = dispenso::async([]() { return 10; });
    dispenso::Future<int> f2 = dispenso::async([]() { return 20; });
    dispenso::Future<int> f3 = dispenso::async([]() { return 30; });

    // Wait for all futures to complete
    auto allFutures = dispenso::when_all(std::move(f1), std::move(f2), std::move(f3));

    // Get the results
    auto tuple = allFutures.get();
    int sum = std::get<0>(tuple).get() + std::get<1>(tuple).get() + std::get<2>(tuple).get();

    std::cout << "  Sum of all futures: " << sum << " (expected: 60)\n";
  }

  // Example 6: Using Future with a custom thread pool
  std::cout << "\nExample 6: Future with custom ThreadPool\n";
  {
    dispenso::ThreadPool customPool(2);

    dispenso::Future<double> future = dispenso::async(customPool, []() { return 3.14159; });

    std::cout << "  Pi from custom pool: " << future.get() << "\n";
  }

  // Example 7: Future<void> for side effects
  std::cout << "\nExample 7: Future<void> for side effects\n";
  {
    int sideEffectValue = 0;

    dispenso::Future<void> voidFuture = dispenso::async([&sideEffectValue]() {
      sideEffectValue = 123;
      // No return value
    });

    voidFuture.get(); // Wait for completion

    std::cout << "  Side effect value: " << sideEffectValue << "\n";
  }

  std::cout << "\nAll Future examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/graph_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example graph_example.cpp
 * Demonstrates task graphs with dependencies using dispenso::Graph.
 */

#include <dispenso/graph.h>
#include <dispenso/graph_executor.h>

#include <array>
#include <iostream>

int main() {
  // Example 1: Simple linear dependency chain
  std::cout << "Example 1: Linear dependency chain (A -> B -> C)\n";
  {
    std::array<int, 3> values = {0, 0, 0};

    dispenso::Graph graph;

    dispenso::Node& nodeA = graph.addNode([&]() {
      values[0] = 1;
      std::cout << "  Node A: set values[0] = 1\n";
    });

    dispenso::Node& nodeB = graph.addNode([&]() {
      values[1] = values[0] * 2;
      std::cout << "  Node B: set values[1] = values[0] * 2 = " << values[1] << "\n";
    });

    dispenso::Node& nodeC = graph.addNode([&]() {
      values[2] = values[1] + 10;
      std::cout << "  Node C: set values[2] = values[1] + 10 = " << values[2] << "\n";
    });

    // Set up dependencies: B depends on A, C depends on B
    nodeB.dependsOn(nodeA);
    nodeC.dependsOn(nodeB);

    // Initialize predecessor counts and execute the graph
    setAllNodesIncomplete(graph);
    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;
    executor(taskSet, graph);

    std::cout << "  Final values: " << values[0] << ", " << values[1] << ", " << values[2] << "\n";
  }

  // Example 2: Diamond dependency pattern
  std::cout << "\nExample 2: Diamond dependency pattern\n";
  {
    //         A
    //        / \
    //       B   C
    //        \ /
    //         D
    std::array<float, 5> r = {0, 0, 0, 0, 0};

    dispenso::Graph graph;

    dispenso::Node& A = graph.addNode([&]() {
      r[0] = 1.0f;
      std::cout << "  A: r[0] = " << r[0] << "\n";
    });

    dispenso::Node& B = graph.addNode([&]() {
      r[1] = r[0] * 2.0f;
      std::cout << "  B: r[1] = r[0] * 2 = " << r[1] << "\n";
    });

    dispenso::Node& C = graph.addNode([&]() {
      r[2] = r[0] + 5.0f;
      std::cout << "  C: r[2] = r[0] + 5 = " << r[2] << "\n";
    });

    dispenso::Node& D = graph.addNode([&]() {
      r[3] = r[1] + r[2];
      std::cout << "  D: r[3] = r[1] + r[2] = " << r[3] << "\n";
    });

    // Diamond pattern dependencies
    B.dependsOn(A);
    C.dependsOn(A);
    D.dependsOn(B, C);

    // Initialize predecessor counts and execute
    setAllNodesIncomplete(graph);
    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;
    executor(taskSet, graph);

    std::cout << "  Expected r[3] = 2 + 6 = 8, got: " << r[3] << "\n";
  }

  // Example 3: Graph re-execution with setAllNodesIncomplete
  std::cout << "\nExample 3: Re-executing a graph\n";
  {
    int counter = 0;

    dispenso::Graph graph;

    dispenso::Node& node = graph.addNode([&]() {
      counter++;
      std::cout << "  Executed, counter = " << counter << "\n";
    });
    (void)node;

    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;

    // First execution - need to initialize predecessor counts
    std::cout << "  First execution:\n";
    setAllNodesIncomplete(graph);
    executor(taskSet, graph);

    // Reset and execute again
    std::cout << "  Second execution (after setAllNodesIncomplete):\n";
    setAllNodesIncomplete(graph);
    executor(taskSet, graph);

    std::cout << "  Final counter = " << counter << " (expected: 2)\n";
  }

  // Example 4: Using subgraphs
  std::cout << "\nExample 4: Using subgraphs\n";
  {
    std::array<float, 4> r = {0, 0, 0, 0};

    dispenso::Graph graph;

    // Create two subgraphs for different parts of computation
    dispenso::Subgraph& inputSubgraph = graph.addSubgraph();
    dispenso::Subgraph& processSubgraph = graph.addSubgraph();

    // Input nodes
    dispenso::Node& inputA = inputSubgraph.addNode([&]() { r[0] = 10.0f; });
    dispenso::Node& inputB = inputSubgraph.addNode([&]() { r[1] = 20.0f; });

    // Processing nodes
    dispenso::Node& sum = processSubgraph.addNode([&]() { r[2] = r[0] + r[1]; });

    // Output node in main graph
    dispenso::Node& output = graph.addNode([&]() { r[3] = r[2] * 2.0f; });

    // Set dependencies
    sum.dependsOn(inputA, inputB);
    output.dependsOn(sum);

    // Initialize predecessor counts and execute
    setAllNodesIncomplete(graph);
    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;
    executor(taskSet, graph);

    std::cout << "  r[0]=" << r[0] << ", r[1]=" << r[1] << ", r[2]=" << r[2] << ", r[3]=" << r[3]
              << "\n";
    std::cout << "  Expected r[3] = (10+20)*2 = 60\n";
  }

  // Example 5: Partial graph re-evaluation
  std::cout << "\nExample 5: Partial graph re-evaluation\n";
  {
    std::array<int, 3> data = {1, 0, 0};
    std::array<int, 3> execCount = {0, 0, 0};

    dispenso::Graph graph;

    dispenso::Node& A = graph.addNode([&]() {
      data[0] = data[0] * 2;
      execCount[0]++;
    });

    dispenso::Node& B = graph.addNode([&]() {
      data[1] = data[0] + 10;
      execCount[1]++;
    });

    dispenso::Node& C = graph.addNode([&]() {
      data[2] = data[1] + 100;
      execCount[2]++;
    });

    B.dependsOn(A);
    C.dependsOn(B);

    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;

    // First execution - initialize predecessor counts
    setAllNodesIncomplete(graph);
    executor(taskSet, graph);
    std::cout << "  After first run: data = [" << data[0] << ", " << data[1] << ", " << data[2]
              << "]\n";

    // Mark only B as incomplete (and propagate to C)
    B.setIncomplete();
    dispenso::ForwardPropagator propagator;
    propagator(graph);

    // Change input to B
    data[0] = 5;

    // Re-execute - only B and C should run
    executor(taskSet, graph);
    std::cout << "  After partial run: data = [" << data[0] << ", " << data[1] << ", " << data[2]
              << "]\n";
    std::cout << "  Execution counts: A=" << execCount[0] << ", B=" << execCount[1]
              << ", C=" << execCount[2] << "\n";
  }

  std::cout << "\nAll Graph examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/latch_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example latch_example.cpp
 * Demonstrates thread synchronization with dispenso::Latch.
 */

#include <dispenso/latch.h>
#include <dispenso/parallel_for.h>
#include <dispenso/thread_pool.h>

#include <atomic>
#include <iostream>
#include <thread>
#include <vector>

int main() {
  // Example 1: Basic latch synchronization
  std::cout << "Example 1: Basic latch synchronization\n";
  {
    constexpr int kNumThreads = 4;
    dispenso::Latch latch(kNumThreads);
    std::atomic<int> readyCount(0);

    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

    for (int i = 0; i < kNumThreads; ++i) {
      taskSet.schedule([&latch, &readyCount, i]() {
        // Simulate some initialization work
        readyCount.fetch_add(1, std::memory_order_relaxed);
        std::cout << "  Thread " << i << " ready\n";

        // Signal completion and wait for others
        latch.arrive_and_wait();

        // All threads are synchronized here
      });
    }

    taskSet.wait();

    std::cout << "  All " << readyCount.load() << " threads synchronized\n";
  }

  // Example 2: Using count_down separately from wait
  std::cout << "\nExample 2: Separate count_down and wait\n";
  {
    constexpr int kNumWorkers = 3;
    dispenso::Latch workComplete(kNumWorkers);
    std::vector<int> results(kNumWorkers, 0);

    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

    // Launch workers
    for (int i = 0; i < kNumWorkers; ++i) {
      taskSet.schedule([&workComplete, &results, i]() {
        // Do work
        results[static_cast<size_t>(i)] = (i + 1) * 10;

        // Signal work is done (non-blocking)
        workComplete.count_down();
      });
    }

    // Main thread waits for all workers
    workComplete.wait();

    std::cout << "  Results: ";
    for (int r : results) {
      std::cout << r << " ";
    }
    std::cout << "\n";
  }

  // Example 3: try_wait for non-blocking check
  std::cout << "\nExample 3: Non-blocking try_wait check\n";
  {
    dispenso::Latch latch(1);

    std::cout << "  Before count_down, try_wait() = " << (latch.try_wait() ? "true" : "false")
              << "\n";

    latch.count_down();

    std::cout << "  After count_down, try_wait() = " << (latch.try_wait() ? "true" : "false")
              << "\n";
  }

  // Example 4: Latch for phased computation
  std::cout << "\nExample 4: Phased computation with multiple latches\n";
  {
    constexpr int kNumThreads = 4;
    dispenso::Latch phase1Complete(kNumThreads);
    dispenso::Latch phase2Complete(kNumThreads);

    std::atomic<int> phase1Sum(0);
    std::atomic<int> phase2Sum(0);

    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

    for (int i = 0; i < kNumThreads; ++i) {
      taskSet.schedule([&, i]() {
        // Phase 1: Each thread contributes its value
        phase1Sum.fetch_add(i + 1, std::memory_order_relaxed);
        phase1Complete.arrive_and_wait();

        // Phase 2: Each thread reads the phase 1 result
        int localResult = phase1Sum.load() * (i + 1);
        phase2Sum.fetch_add(localResult, std::memory_order_relaxed);
        phase2Complete.count_down();
      });
    }

    phase2Complete.wait();

    std::cout << "  Phase 1 sum: " << phase1Sum.load() << " (1+2+3+4 = 10)\n";
    std::cout << "  Phase 2 sum: " << phase2Sum.load() << " (10*1 + 10*2 + 10*3 + 10*4 = 100)\n";
  }

  // Example 5: Latch as a one-shot gate
  std::cout << "\nExample 5: Latch as a one-shot start gate\n";
  {
    dispenso::Latch startGate(1);
    std::atomic<int> workersStarted(0);

    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

    // Schedule workers that wait for the start signal
    for (int i = 0; i < 5; ++i) {
      taskSet.schedule([&startGate, &workersStarted]() {
        // Wait for start signal
        startGate.wait();
        workersStarted.fetch_add(1, std::memory_order_relaxed);
      });
    }

    // Let workers queue up
    std::this_thread::sleep_for(std::chrono::milliseconds(10));
    std::cout << "  Workers before gate opens: " << workersStarted.load() << "\n";

    // Open the gate
    startGate.count_down();

    taskSet.wait();
    std::cout << "  Workers after gate opens: " << workersStarted.load() << "\n";
  }

  std::cout << "\nAll Latch examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/parallel_for_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example parallel_for_example.cpp
 * Demonstrates basic usage of dispenso::parallel_for for parallel loop execution.
 */

#include <dispenso/parallel_for.h>

#include <cmath>
#include <iostream>
#include <vector>

int main() {
  constexpr size_t kArraySize = 1000000;

  // Create input and output vectors
  std::vector<double> input(kArraySize);
  std::vector<double> output(kArraySize);

  // Initialize input data
  for (size_t i = 0; i < kArraySize; ++i) {
    input[i] = static_cast<double>(i);
  }

  // Example 1: Simple parallel_for with index-based lambda
  // Process each element independently in parallel
  std::cout << "Example 1: Simple parallel_for with per-element lambda\n";
  dispenso::parallel_for(0, kArraySize, [&](size_t i) { output[i] = std::sqrt(input[i]); });

  std::cout << "  output[0] = " << output[0] << ", output[999999] = " << output[999999] << "\n";

  // Example 2: parallel_for with range-based lambda for better cache utilization
  // The lambda receives start and end indices for a chunk (automatic chunking)
  std::cout << "\nExample 2: parallel_for with range-based lambda (chunked)\n";
  dispenso::parallel_for(size_t{0}, kArraySize, [&](size_t start, size_t end) {
    for (size_t i = start; i < end; ++i) {
      output[i] = input[i] * 2.0;
    }
  });

  std::cout << "  output[0] = " << output[0] << ", output[999999] = " << output[999999] << "\n";

  // Example 3: parallel_for with per-thread state
  // Useful for reduction operations or when threads need local accumulators
  std::cout << "\nExample 3: parallel_for with per-thread state (reduction)\n";
  std::vector<double> partialSums;
  dispenso::parallel_for(
      partialSums,
      []() { return 0.0; }, // State initializer
      size_t{0},
      kArraySize,
      [&](double& localSum, size_t start, size_t end) {
        for (size_t i = start; i < end; ++i) {
          localSum += input[i];
        }
      });

  // Combine partial sums
  double totalSum = 0.0;
  for (double partial : partialSums) {
    totalSum += partial;
  }
  std::cout << "  Sum of all elements: " << totalSum << "\n";

  // Example 4: parallel_for with options to limit parallelism
  std::cout << "\nExample 4: parallel_for with limited parallelism\n";
  dispenso::ParForOptions options;
  options.maxThreads = 2; // Limit to 2 threads
  dispenso::parallel_for(
      0,
      100,
      [](size_t i) {
        // Light work that doesn't need many threads
        (void)i;
      },
      options);
  std::cout << "  Completed with maxThreads = 2\n";

  std::cout << "\nAll parallel_for examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/pipeline_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example pipeline_example.cpp
 * Demonstrates multi-stage pipeline processing with dispenso::pipeline.
 */

#include <dispenso/pipeline.h>

#include <cmath>
#include <iostream>
#include <sstream>
#include <vector>

int main() {
  // Example 1: Simple 3-stage pipeline (generator -> transform -> sink)
  std::cout << "Example 1: Simple 3-stage pipeline\n";
  {
    std::vector<int> results;
    int counter = 0;

    dispenso::pipeline(
        // Stage 1: Generator - produces values
        [&counter]() -> dispenso::OpResult<int> {
          if (counter >= 10) {
            return {}; // Empty result signals end of input
          }
          return counter++;
        },
        // Stage 2: Transform - squares the value
        [](int value) { return value * value; },
        // Stage 3: Sink - collects results
        [&results](int value) { results.push_back(value); });

    std::cout << "  Squared values: ";
    for (int r : results) {
      std::cout << r << " ";
    }
    std::cout << "\n";
  }

  // Example 2: Pipeline with parallel stages
  std::cout << "\nExample 2: Pipeline with parallel transform stage\n";
  {
    std::vector<double> results;
    int counter = 0;

    dispenso::pipeline(
        // Generator (serial)
        [&counter]() -> dispenso::OpResult<int> {
          if (counter >= 100) {
            return {};
          }
          return counter++;
        },
        // Transform (parallel with limit of 4 concurrent operations)
        dispenso::stage(
            [](int value) {
              // Simulate expensive computation
              return std::sqrt(static_cast<double>(value));
            },
            4),
        // Sink (serial)
        [&results](double value) { results.push_back(value); });

    std::cout << "  First 5 sqrt results: ";
    for (size_t i = 0; i < 5 && i < results.size(); ++i) {
      std::cout << results[i] << " ";
    }
    std::cout << "...\n";
    std::cout << "  Total results: " << results.size() << "\n";
  }

  // Example 3: Pipeline with filtering
  std::cout << "\nExample 3: Pipeline with filtering (keep only even numbers)\n";
  {
    std::vector<int> results;
    int counter = 0;

    dispenso::pipeline(
        // Generator
        [&counter]() -> dispenso::OpResult<int> {
          if (counter >= 20) {
            return {};
          }
          return counter++;
        },
        // Filter: only pass through even numbers
        [](int value) -> dispenso::OpResult<int> {
          if (value % 2 == 0) {
            return value;
          }
          return {}; // Filter out odd numbers
        },
        // Sink
        [&results](int value) { results.push_back(value); });

    std::cout << "  Even numbers: ";
    for (int r : results) {
      std::cout << r << " ";
    }
    std::cout << "\n";
  }

  // Example 4: Pipeline with type transformation
  std::cout << "\nExample 4: Pipeline with type transformations\n";
  {
    std::vector<std::string> results;
    int counter = 0;

    dispenso::pipeline(
        // Generate integers
        [&counter]() -> dispenso::OpResult<int> {
          if (counter >= 5) {
            return {};
          }
          return counter++;
        },
        // Transform to double
        [](int value) { return static_cast<double>(value) * 1.5; },
        // Transform to string
        [](double value) {
          std::ostringstream oss;
          oss << "Value: " << value;
          return oss.str();
        },
        // Collect strings
        [&results](std::string value) { results.push_back(std::move(value)); });

    std::cout << "  String results:\n";
    for (const auto& r : results) {
      std::cout << "    " << r << "\n";
    }
  }

  // Example 5: Pipeline with custom thread pool
  std::cout << "\nExample 5: Pipeline with custom ThreadPool\n";
  {
    dispenso::ThreadPool customPool(2);
    std::vector<int> results;
    int counter = 0;

    dispenso::pipeline(
        customPool,
        // Generator
        [&counter]() -> dispenso::OpResult<int> {
          if (counter >= 10) {
            return {};
          }
          return counter++;
        },
        // Parallel transform
        dispenso::stage([](int value) { return value + 100; }, dispenso::kStageNoLimit),
        // Sink
        [&results](int value) { results.push_back(value); });

    std::cout << "  Results from custom pool: ";
    for (int r : results) {
      std::cout << r << " ";
    }
    std::cout << "\n";
  }

  // Example 6: Single-stage pipeline (just a generator loop)
  std::cout << "\nExample 6: Single-stage pipeline\n";
  {
    int sum = 0;
    int counter = 0;

    // Single stage that returns bool (true = continue, false = stop)
    dispenso::pipeline([&]() -> bool {
      if (counter >= 10) {
        return false;
      }
      sum += counter++;
      return true;
    });

    std::cout << "  Sum computed in single stage: " << sum << " (expected: 45)\n";
  }

  std::cout << "\nAll Pipeline examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/resource_pool_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example resource_pool_example.cpp
 * Demonstrates managing pooled resources with dispenso::ResourcePool.
 */

#include <dispenso/parallel_for.h>
#include <dispenso/resource_pool.h>

#include <atomic>
#include <iostream>
#include <sstream>

// Example resource class - a reusable buffer
class Buffer {
 public:
  Buffer() : data_(1024, 0), useCount_(0) {}

  void process(int value) {
    // Simulate using the buffer
    for (size_t i = 0; i < data_.size(); ++i) {
      data_[i] = value + static_cast<int>(i);
    }
    useCount_++;
  }

  int checksum() const {
    int sum = 0;
    for (int val : data_) {
      sum += val;
    }
    return sum;
  }

  int useCount() const {
    return useCount_;
  }

 private:
  std::vector<int> data_;
  int useCount_;
};

// Example expensive-to-create resource
class DatabaseConnection {
 public:
  DatabaseConnection(int id) : connectionId_(id), queryCount_(0) {}

  int executeQuery(int param) {
    queryCount_++;
    return connectionId_ * 1000 + param;
  }

  int connectionId() const {
    return connectionId_;
  }
  int queryCount() const {
    return queryCount_;
  }

 private:
  int connectionId_;
  int queryCount_;
};

int main() {
  // Example 1: Basic resource pool usage
  std::cout << "Example 1: Basic ResourcePool usage\n";
  {
    // Create a pool of 4 buffers
    dispenso::ResourcePool<Buffer> bufferPool(4, []() { return Buffer(); });

    std::atomic<int> totalChecksum(0);

    // Use resources from multiple threads
    dispenso::parallel_for(0, 100, [&bufferPool, &totalChecksum](size_t i) {
      // Acquire a resource from the pool (blocks if none available)
      auto resource = bufferPool.acquire();

      // Use the resource
      resource.get().process(static_cast<int>(i));
      totalChecksum.fetch_add(resource.get().checksum(), std::memory_order_relaxed);

      // Resource automatically returned to pool when 'resource' goes out of scope
    });

    std::cout << "  Total checksum from 100 operations: " << totalChecksum.load() << "\n";
  }

  // Example 2: Resource pool with connection-like objects
  std::cout << "\nExample 2: Connection pool pattern\n";
  {
    std::atomic<int> nextConnectionId(0);

    // Create a pool of 3 "database connections"
    dispenso::ResourcePool<DatabaseConnection> connectionPool(3, [&nextConnectionId]() {
      return DatabaseConnection(nextConnectionId.fetch_add(1, std::memory_order_relaxed));
    });

    std::atomic<int> totalResult(0);

    dispenso::parallel_for(0, 50, [&connectionPool, &totalResult](size_t i) {
      auto conn = connectionPool.acquire();
      int result = conn.get().executeQuery(static_cast<int>(i));
      totalResult.fetch_add(result, std::memory_order_relaxed);
    });

    std::cout << "  Total query result sum: " << totalResult.load() << "\n";
    std::cout << "  (50 queries distributed across 3 connections)\n";
  }

  // Example 3: Resource pool limiting concurrency
  std::cout << "\nExample 3: Using ResourcePool to limit concurrency\n";
  {
    // Use a pool of "permits" to limit concurrent operations
    struct Permit {};

    constexpr size_t kMaxConcurrent = 2;
    dispenso::ResourcePool<Permit> permits(kMaxConcurrent, []() { return Permit(); });

    std::atomic<int> currentActive(0);
    std::atomic<int> maxObserved(0);

    dispenso::parallel_for(0, 100, [&](size_t i) {
      // Acquire permit (blocks if max concurrency reached)
      auto permit = permits.acquire();

      int active = currentActive.fetch_add(1, std::memory_order_relaxed) + 1;

      // Track max concurrent
      int maxSeen = maxObserved.load(std::memory_order_relaxed);
      while (active > maxSeen &&
             !maxObserved.compare_exchange_weak(maxSeen, active, std::memory_order_relaxed)) {
      }

      // Simulate work
      volatile int work = 0;
      for (int j = 0; j < 1000; ++j) {
        work += j;
      }
      (void)work;
      (void)i;

      currentActive.fetch_sub(1, std::memory_order_relaxed);
      // Permit returned when scope exits
    });

    std::cout << "  Max concurrent operations observed: " << maxObserved.load() << " (limit was "
              << kMaxConcurrent << ")\n";
  }

  // Example 4: Resource with expensive initialization
  std::cout << "\nExample 4: Resources with expensive initialization\n";
  {
    std::atomic<int> initCount(0);

    // Each resource is expensive to create, so we pool them
    dispenso::ResourcePool<std::stringstream> streamPool(2, [&initCount]() {
      initCount.fetch_add(1, std::memory_order_relaxed);
      // Simulate expensive setup
      std::stringstream ss;
      ss.precision(10);
      return ss;
    });

    dispenso::parallel_for(0, 20, [&streamPool](size_t i) {
      auto stream = streamPool.acquire();
      stream.get().str("");
      stream.get() << "Value: " << i * 3.14159;
      // Stream reused, not recreated
    });

    std::cout << "  Total initializations: " << initCount.load() << " (pool size: 2)\n";
    std::cout << "  (Only 2 streams were created, reused for 20 operations)\n";
  }

  // Example 5: Nested resource acquisition (be careful with deadlock!)
  std::cout << "\nExample 5: Sequential resource acquisition pattern\n";
  {
    dispenso::ResourcePool<int> poolA(2, []() { return 100; });
    dispenso::ResourcePool<int> poolB(2, []() { return 200; });

    std::atomic<int> resultSum(0);

    // Safe pattern: acquire resources sequentially, release in reverse order
    dispenso::parallel_for(0, 10, [&](size_t i) {
      auto resA = poolA.acquire();
      auto resB = poolB.acquire();

      resultSum.fetch_add(resA.get() + resB.get(), std::memory_order_relaxed);
      (void)i;

      // resB released first, then resA (LIFO order)
    });

    std::cout << "  Result sum: " << resultSum.load() << " (expected: 3000)\n";
  }

  std::cout << "\nAll ResourcePool examples completed successfully!\n";
  return 0;
}


================================================
FILE: examples/task_set_example.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

/**
 * @example task_set_example.cpp
 * Demonstrates scheduling tasks and waiting for completion with TaskSet.
 */

#include <dispenso/task_set.h>
#include <dispenso/thread_pool.h>

#include <atomic>
#include <iostream>
#include <vector>

int main() {
  // Example 1: Basic TaskSet usage
  std::cout << "Example 1: Basic TaskSet with simple tasks\n";
  {
    dispenso::TaskSet taskSet(dispenso::globalThreadPool());

    std::atomic<int> counter(0);

    // Schedule several tasks
    for (int i = 0; i < 10; ++i) {
      taskSet.schedule([&counter, i]() { counter.fetch_add(i, std::memory_order_relaxed); });
    }

    // Wait for all tasks to complete
    taskSet.wait();

    std::cout << "  Sum of 0..9 = " << counter.load() << " (expected: 45)\n";
  }

  // Example 2: ConcurrentTaskSet for multi-threaded scheduling
  std::cout << "\nExample 2: ConcurrentTaskSet (schedule from multiple threads)\n";
  {
    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

    std::atomic<int> total(0);

    // Schedule tasks that themselves schedule more tasks
    for (int i = 0; i < 5; ++i) {
      taskSet.schedule([&taskSet, &total, i]() {
        // Each task schedules two sub-tasks
        for (int j = 0; j < 2; ++j) {
          taskSet.schedule(
              [&total, i, j]() { total.fetch_add(i * 10 + j, std::memory_order_relaxed); });
        }
      });
    }

    taskSet.wait();

    std::cout << "  Total from nested scheduling = " << total.load() << "\n";
  }

  // Example 3: Using a custom thread pool
  std::cout << "\nExample 3: TaskSet with custom ThreadPool\n";
  {
    // Create a small thread pool with 2 threads
    dispenso::ThreadPool customPool(2);

    dispenso::TaskSet taskSet(customPool);

    std::vector<int> results(4, 0);

    for (size_t i = 0; i < results.size(); ++i) {
      taskSet.schedule([&results, i]() { results[i] = static_cast<int>(i * i); });
    }

    taskSet.wait();

    std::cout << "  Squares: ";
    for (int r : results) {
      std::cout << r << " ";
    }
    std::cout << "\n";
  }

  // Example 4: TaskSet cancellation
  std::cout << "\nExample 4: TaskSet cancellation\n";
  {
    dispenso::TaskSet taskSet(dispenso::globalThreadPool());

    std::atomic<int> completed(0);
    std::atomic<int> skipped(0);

    // Schedule many tasks, but cancel after scheduling some
    for (int i = 0; i < 1000; ++i) {
      taskSet.schedule([&taskSet, &completed, &skipped]() {
        if (taskSet.canceled()) {
          skipped.fetch_add(1, std::memory_order_relaxed);
          return;
        }
        completed.fetch_add(1, std::memory_order_relaxed);
      });

      // Cancel after scheduling 100 tasks
      if (i == 100) {
        taskSet.cancel();
      }
    }

    bool wasCanceled = taskSet.wait();

    std::cout << "  Was canceled: " << (wasCanceled ? "yes" : "no") << "\n";
    std::cout << "  Completed tasks: " << completed.load() << "\n";
    std::cout << "  Skipped tasks: " << skipped.load() << "\n";
  }

  // Example 5: Checking canceled status within tasks
  std::cout << "\nExample 5: Early exit from tasks using canceled()\n";
  {
    dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

    std::atomic<int> itemsProcessed(0);

    // Schedule a task that processes items and checks for cancellation
    taskSet.schedule([&taskSet, &itemsProcessed]() {
      for (int i = 0; i < 1000; ++i) {
        if (taskSet.canceled()) {
          // Exit early if canceled
          break;
        }
        itemsProcessed.fetch_add(1, std::memory_order_relaxed);
      }
    });

    // Cancel from another task after processing starts
    taskSet.schedule([&taskSet]() { taskSet.cancel(); });

    taskSet.wait();

    std::cout << "  Items processed before cancellation: " << itemsProcessed.load() << "\n";
  }

  std::cout << "\nAll TaskSet examples completed successfully!\n";
  return 0;
}


================================================
FILE: results/android_arm64.json
================================================
{
  "machine_info": {
    "timestamp": "2026-03-13T21:39:24+00:00",
    "platform": "Android",
    "platform_release": "12",
    "platform_version": "SDK 32",
    "architecture": "aarch64",
    "processor": "arm64-v8a",
    "device_model": "Quest 3",
    "device_name": "eureka",
    "soc_manufacturer": "Qualcomm",
    "soc_model": "SXR2230P",
    "hardware": "",
    "cpu_implementer": "0x41",
    "cpu_part": "0xd4b",
    "cpu_variant": "0x0",
    "cpu_cores": 6,
    "cpu_max_freq_mhz": 2054,
    "memory_kb": 7941372,
    "memory_gb": 7.5,
    "kernel": "5.10.205-01593-g21f33663a190",
    "platform_id": "android-SXR2230P-6c",
    "compiler": {
      "compiler_id": "Clang",
      "compiler_version": "19.x (Android NDK r26b)",
      "build_type": "Release (opt)",
      "compiler_summary": "Clang 19.x (Android NDK r26b) Release (opt)"
    }
  },
  "results": [
    {
      "name": "cascading_parallel_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:39:28-07:00",
          "host_name": "localhost",
          "executable": "./cascading_parallel_for_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24975,
            "real_time": 11202.182462469822,
            "cpu_time": 11180.797637637637,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/100000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 274,
            "real_time": 1029693.8759103239,
            "cpu_time": 1025645.7189781022,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/10000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 102025825.99958986,
            "cpu_time": 101746388.00000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1891,
            "real_time": 147090.64674729988,
            "cpu_time": 146669.67001586474,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1885,
            "real_time": 139420.02387293856,
            "cpu_time": 138926.64190981435,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1777,
            "real_time": 157708.99437164958,
            "cpu_time": 157105.14124929672,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1605,
            "real_time": 171646.142055542,
            "cpu_time": 170904.6579439254,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1506,
            "real_time": 188087.77158036386,
            "cpu_time": 186556.99402390438,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 159,
            "real_time": 1759704.7106895354,
            "cpu_time": 1747337.484276733,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 301,
            "real_time": 925631.1461823646,
            "cpu_time": 921134.9634551505,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 385,
            "real_time": 716468.145453421,
            "cpu_time": 712737.4337662347,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 458,
            "real_time": 605852.2707438713,
            "cpu_time": 602639.3078602618,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 492,
            "real_time": 569085.8455289241,
            "cpu_time": 565098.5447154471,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 149613010.00039384,
            "cpu_time": 148774817.9999997,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 75134707.99995048,
            "cpu_time": 74670859.00000025,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 50615046.80015787,
            "cpu_time": 50311843.19999973,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 40938315.14260923,
            "cpu_time": 40677708.57142859,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 36961543.85709503,
            "cpu_time": 36640431.14285701,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_task_group/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1842,
            "real_time": 149690.08794803094,
            "cpu_time": 149190.05428881635,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_task_group/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2256,
            "real_time": 124360.79122322708,
            "cpu_time": 123989.0842198585,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_task_group/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2211,
            "real_time": 126672.07372210975,
            "cpu_time": 126250.80099502507,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_task_group/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1980,
            "real_time": 141339.5555555142,
            "cpu_time": 140852.2762626264,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_task_group/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1944,
            "real_time": 144893.0879623469,
            "cpu_time": 144090.20267489654,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_task_group/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 1753545.0937543828,
            "cpu_time": 1742947.587499999,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_task_group/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 304,
            "real_time": 920307.4506618386,
            "cpu_time": 915574.1249999966,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_task_group/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 416,
            "real_time": 674174.120193199,
            "cpu_time": 669940.302884612,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_task_group/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 462,
            "real_time": 567108.6428579948,
            "cpu_time": 562437.7640692613,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_task_group/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 570,
            "real_time": 496408.31052544713,
            "cpu_time": 492715.1859649102,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_task_group/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 150388842.99974597,
            "cpu_time": 149007789.99999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_task_group/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 74963484.00036368,
            "cpu_time": 74597360.00000028,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_task_group/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 50236367.8333495,
            "cpu_time": 49960087.83333335,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_task_group/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 40492846.28568525,
            "cpu_time": 40236972.285714194,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_task_group/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 36228642.874903016,
            "cpu_time": 35834328.62500002,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_blocking/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24408,
            "real_time": 11479.564282153204,
            "cpu_time": 11452.482341855173,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_blocking/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24648,
            "real_time": 11359.95342418285,
            "cpu_time": 11339.263063940192,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_blocking/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24488,
            "real_time": 11354.091228354944,
            "cpu_time": 11333.014537732819,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_blocking/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24626,
            "real_time": 11362.63826854818,
            "cpu_time": 11337.51770486471,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_blocking/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24602,
            "real_time": 11354.038655426712,
            "cpu_time": 11332.960165840203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_blocking/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 266,
            "real_time": 1057726.063909901,
            "cpu_time": 1052840.3195488781,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_blocking/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 381,
            "real_time": 730870.0419976538,
            "cpu_time": 727297.8215223113,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_blocking/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 493,
            "real_time": 548037.2636942863,
            "cpu_time": 545723.3529411742,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_blocking/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 610,
            "real_time": 429752.00655642076,
            "cpu_time": 427672.745901644,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_blocking/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 727,
            "real_time": 407500.9724916474,
            "cpu_time": 405264.20770289056,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_blocking/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 104531659.99994476,
            "cpu_time": 104143920.33333306,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_blocking/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 71714668.74979159,
            "cpu_time": 71433165.74999936,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_blocking/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 48331223.59996196,
            "cpu_time": 48147426.99999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_blocking/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 41342794.28555082,
            "cpu_time": 41188781.42857138,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_blocking/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 35476186.571551286,
            "cpu_time": 35057246.42857138,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_cascaded/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24846,
            "real_time": 11261.29988727075,
            "cpu_time": 11239.353175561382,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_cascaded/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20791,
            "real_time": 13471.981386236224,
            "cpu_time": 13434.927372420827,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_cascaded/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19998,
            "real_time": 14205.151065086318,
            "cpu_time": 14178.240724072426,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_cascaded/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20052,
            "real_time": 14558.53206657101,
            "cpu_time": 14529.449930181525,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_cascaded/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20258,
            "real_time": 13972.97655239293,
            "cpu_time": 13878.33428768883,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_cascaded/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 272,
            "real_time": 1037306.705878858,
            "cpu_time": 1033160.4264705878,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_cascaded/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 408,
            "real_time": 685366.4387260021,
            "cpu_time": 680875.5882352984,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_cascaded/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 542,
            "real_time": 496504.9889284689,
            "cpu_time": 494506.7416974156,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_cascaded/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 690,
            "real_time": 401480.11159571056,
            "cpu_time": 399461.51304347644,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_cascaded/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 925,
            "real_time": 320889.6151340309,
            "cpu_time": 317039.64864864876,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_cascaded/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 103815080.00015552,
            "cpu_time": 102696077.6666665,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_cascaded/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 67462584.75004652,
            "cpu_time": 67193516.25000058,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_cascaded/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 46207417.833102226,
            "cpu_time": 46038352.5000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_cascaded/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 38270970.999812044,
            "cpu_time": 38143527.285714336,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_cascaded/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 34649229.71449726,
            "cpu_time": 34421413.714285724,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "concurrent_vector_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:39:56-07:00",
          "host_name": "localhost",
          "executable": "./concurrent_vector_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_std_push_back_serial",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91,
            "real_time": 3072432.769241149,
            "cpu_time": 3057589.3296703296,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_push_back_serial",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52,
            "real_time": 5289420.59617503,
            "cpu_time": 5264596.480769231,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 26558054.69985353,
            "cpu_time": 26441110.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14667761.894758627,
            "cpu_time": 14598944.842105264,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 125,
            "real_time": 2245383.9759982657,
            "cpu_time": 2235582.1359999995,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 171,
            "real_time": 1641975.6783554412,
            "cpu_time": 1634678.0643274842,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_push_back_serial_reserve",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 1845768.324510693,
            "cpu_time": 1838132.9403973501,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_reserve",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 25676702.36368775,
            "cpu_time": 25559034.36363635,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_reserve",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14737101.263132278,
            "cpu_time": 14676946.210526323,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 126,
            "real_time": 2238579.9206353514,
            "cpu_time": 2230051.6825396833,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 174,
            "real_time": 1619006.3907983464,
            "cpu_time": 1611188.063218391,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2033,
            "real_time": 136768.566649875,
            "cpu_time": 136311.72110182003,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 263,
            "real_time": 1072412.1787084688,
            "cpu_time": 1068618.5361216723,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 225,
            "real_time": 1246598.2711081677,
            "cpu_time": 1241887.9777777765,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 337,
            "real_time": 833397.4005930811,
            "cpu_time": 831120.4777448096,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate_reverse",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1257,
            "real_time": 223231.17820138438,
            "cpu_time": 222634.7040572789,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate_reverse",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 342,
            "real_time": 821501.9707618625,
            "cpu_time": 819362.3625730982,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate_reverse",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77,
            "real_time": 3655440.7142830314,
            "cpu_time": 3647636.6883116905,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate_reverse",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 329,
            "real_time": 854359.2522822755,
            "cpu_time": 850187.6048632221,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_lower_bound",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_std_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 79608262.99995461,
            "cpu_time": 79317084.24999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_lower_bound",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 99955079.33308546,
            "cpu_time": 99534099.66666627,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_lower_bound",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 108657354.33314209,
            "cpu_time": 107709340.00000013,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_lower_bound",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 74007611.74994841,
            "cpu_time": 73656837.99999978,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_index",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_std_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1993,
            "real_time": 136777.56347178397,
            "cpu_time": 136282.06573005498,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_index",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 362,
            "real_time": 775122.2265192381,
            "cpu_time": 772816.5303867421,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_index",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 3691881.5394768133,
            "cpu_time": 3674258.6447368227,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_index",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 148,
            "real_time": 1895850.0608204228,
            "cpu_time": 1888489.2297297285,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_random",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_std_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 4724779.779650616,
            "cpu_time": 4701136.98305084,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_random",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52,
            "real_time": 5496402.788454501,
            "cpu_time": 5470864.403846143,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_random",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 6093960.042562685,
            "cpu_time": 6065110.893617047,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_random",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 5669632.860008278,
            "cpu_time": 5642690.560000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 136648068.9994205,
            "cpu_time": 95826839.33333355,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 143536004.00014934,
            "cpu_time": 100548978.99999975,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel",
            "family_index": 33,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 75401752.49960157,
            "cpu_time": 75116082.7499997,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel",
            "family_index": 34,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 34231539.83339944,
            "cpu_time": 33915287.166666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_reserve",
            "family_index": 35,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 134157044.33343005,
            "cpu_time": 93604004.33333342,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_reserve",
            "family_index": 36,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 75114903.25032355,
            "cpu_time": 74945143.74999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_reserve",
            "family_index": 37,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 34488082.8749865,
            "cpu_time": 31061601.375000026,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_clear",
            "family_index": 38,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 134396732.33309197,
            "cpu_time": 94807552.33333345,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_clear",
            "family_index": 39,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 143588399.6667447,
            "cpu_time": 100260116.66666554,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_clear",
            "family_index": 40,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 72413015.00016561,
            "cpu_time": 71783620.74999978,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_clear",
            "family_index": 41,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 32486044.88888102,
            "cpu_time": 32319646.000000075,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_10",
            "family_index": 42,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 18439793.500047844,
            "cpu_time": 13212111.772727298,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_10",
            "family_index": 43,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 22642562.57895843,
            "cpu_time": 15222606.26315785,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_10",
            "family_index": 44,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 9255702.333333222,
            "cpu_time": 9150227.333333306,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_10",
            "family_index": 45,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 82,
            "real_time": 3593392.1219510944,
            "cpu_time": 3531427.256097569,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_100",
            "family_index": 46,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 131,
            "real_time": 3325676.6259528515,
            "cpu_time": 2165852.305343528,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_100",
            "family_index": 47,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 10747807.777766462,
            "cpu_time": 6374092.511111071,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_100",
            "family_index": 48,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 316,
            "real_time": 886992.6044306201,
            "cpu_time": 877448.079113925,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_100",
            "family_index": 49,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 645,
            "real_time": 432565.53023335675,
            "cpu_time": 428051.37519379874,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_max",
            "family_index": 50,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 464,
            "real_time": 985640.7004329821,
            "cpu_time": 646022.9267241446,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_max",
            "family_index": 51,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 6101889.086212082,
            "cpu_time": 5141329.000000026,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_max",
            "family_index": 52,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 985,
            "real_time": 283090.6588836664,
            "cpu_time": 281263.4802030468,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_max",
            "family_index": 53,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2356,
            "real_time": 118329.2334460896,
            "cpu_time": 117476.59083191797,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_each_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:40:20-07:00",
          "host_name": "localhost",
          "executable": "./for_each_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000,
            "real_time": 3.6400160752236843e-07,
            "cpu_time": 3.6499999999956706e-07,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000,
            "real_time": 4.1600105760153383e-07,
            "cpu_time": 3.6500000000216914e-07,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000,
            "real_time": 3.650002327049151e-07,
            "cpu_time": 3.6500000000216914e-07,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 305362,
            "real_time": 909.9793032527018,
            "cpu_time": 907.814115705295,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 212005,
            "real_time": 1278.5740430670805,
            "cpu_time": 1276.070054951534,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213786,
            "real_time": 1104.874453886845,
            "cpu_time": 1101.9788760723354,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 210089,
            "real_time": 1208.859321531916,
            "cpu_time": 1205.3133195931237,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 169699,
            "real_time": 1662.587611011352,
            "cpu_time": 1656.2057348599583,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 312,
            "real_time": 899356.8942339781,
            "cpu_time": 895391.804487181,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 464,
            "real_time": 604533.1077561498,
            "cpu_time": 602070.3297413803,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 683,
            "real_time": 413785.9590039493,
            "cpu_time": 411359.57833089336,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 797,
            "real_time": 341192.3312416144,
            "cpu_time": 339886.49184441677,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1121,
            "real_time": 256581.11061489768,
            "cpu_time": 253989.97234611964,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 87534383.66632812,
            "cpu_time": 87248836.66666639,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 58416068.40003805,
            "cpu_time": 58186042.79999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 39041313.42850243,
            "cpu_time": 38762797.85714282,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 33879019.66662967,
            "cpu_time": 33725734.44444448,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 25401328.272809304,
            "cpu_time": 25188560.909090802,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_deque/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45481,
            "real_time": 6150.10428529867,
            "cpu_time": 6137.659330269768,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_deque/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16913,
            "real_time": 16516.506474381313,
            "cpu_time": 16482.889197658566,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_deque/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17928,
            "real_time": 15588.800591252735,
            "cpu_time": 15551.514725568859,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_deque/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16509,
            "real_time": 16748.795263095326,
            "cpu_time": 16710.91974074749,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_deque/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15882,
            "real_time": 17642.94723584601,
            "cpu_time": 17569.480921798266,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_deque/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 6122648.934805234,
            "cpu_time": 6105209.3478261065,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_deque/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 16782241.2940415,
            "cpu_time": 16748437.76470592,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_deque/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 16720896.294132486,
            "cpu_time": 16640015.470588287,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_deque/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 16135785.94119377,
            "cpu_time": 16078361.000000035,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_deque/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 16500798.23529642,
            "cpu_time": 16403517.52941182,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_deque/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 611865999.0006562,
            "cpu_time": 610454684.0000005,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_deque/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1662459645.0001264,
            "cpu_time": 1655781332.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_deque/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1658312666.000711,
            "cpu_time": 1652619182.9999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_deque/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1654381362.0010042,
            "cpu_time": 1651618719.9999983,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_deque/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1661199853.000653,
            "cpu_time": 1655234847.9999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_list/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34723,
            "real_time": 8002.854016110876,
            "cpu_time": 7989.448060363451,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_list/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19996,
            "real_time": 13821.888177566336,
            "cpu_time": 13791.76390278071,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_list/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16657,
            "real_time": 16791.0029416874,
            "cpu_time": 16757.13159632569,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_list/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17349,
            "real_time": 15866.502046227826,
            "cpu_time": 15825.720329702024,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_list/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14972,
            "real_time": 18214.411768647333,
            "cpu_time": 18122.711127437873,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_list/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 8033067.742819965,
            "cpu_time": 8008691.942857179,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_list/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 18143033.000039093,
            "cpu_time": 18102971.866666686,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_list/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 17601007.562575433,
            "cpu_time": 17560462.250000075,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_list/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 17037403.64706284,
            "cpu_time": 16987840.117647078,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_list/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 16896717.35295954,
            "cpu_time": 16804948.23529427,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_set/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104981,
            "real_time": 2665.8799782919,
            "cpu_time": 2659.856783608482,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_set/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59197,
            "real_time": 4726.791070493045,
            "cpu_time": 4715.818318495855,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_set/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51842,
            "real_time": 5366.31389606957,
            "cpu_time": 5347.242969021276,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_set/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50618,
            "real_time": 5521.94104865395,
            "cpu_time": 5509.778379232643,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_set/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47167,
            "real_time": 6092.973243999766,
            "cpu_time": 6047.021328471201,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_set/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103044,
            "real_time": 2715.8142249951875,
            "cpu_time": 2710.8534509529823,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_set/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58825,
            "real_time": 4857.855707617169,
            "cpu_time": 4848.101045473827,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_set/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51462,
            "real_time": 5454.740410417156,
            "cpu_time": 5441.044518285275,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_set/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49642,
            "real_time": 5613.515208891196,
            "cpu_time": 5599.823818540763,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_set/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45189,
            "real_time": 6182.183584493816,
            "cpu_time": 6138.634579211684,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_latency_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:40:54-07:00",
          "host_name": "localhost",
          "executable": "./for_latency_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 30065489.111115713,
            "cpu_time": 5260.222222222285,
            "time_unit": "ns",
            "mean": 2.893518519154265e-08,
            "stddev": 2.5880416411933858e-08
          },
          {
            "name": "BM_tbb/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44767365.66690913,
            "cpu_time": 14424687.5,
            "time_unit": "ns",
            "mean": 0.014702039930555558,
            "stddev": 0.0003531361544870617
          },
          {
            "name": "BM_tbb/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 37070648.2501646,
            "cpu_time": 6908248.875000002,
            "time_unit": "ns",
            "mean": 0.007004479166666661,
            "stddev": 0.0002805563603589796
          },
          {
            "name": "BM_tbb/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 34706748.24984599,
            "cpu_time": 4588411.000000001,
            "time_unit": "ns",
            "mean": 0.004636731770833402,
            "stddev": 4.5145328525355346e-05
          },
          {
            "name": "BM_tbb/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 33506884.875123434,
            "cpu_time": 3408971.25,
            "time_unit": "ns",
            "mean": 0.0034360286458333666,
            "stddev": 4.9276378230865174e-05
          },
          {
            "name": "BM_tbb/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 32804241.624944553,
            "cpu_time": 2652786.375000004,
            "time_unit": "ns",
            "mean": 0.0027350781249999845,
            "stddev": 0.00046697933512709677
          },
          {
            "name": "BM_dispenso/1/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44590499.33332911,
            "cpu_time": 14386996.500000007,
            "time_unit": "ns",
            "mean": 0.014520824652777856,
            "stddev": 0.00012720817743809096
          },
          {
            "name": "BM_dispenso/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 37393657.00000365,
            "cpu_time": 7231779.428571434,
            "time_unit": "ns",
            "mean": 0.007320788690476222,
            "stddev": 7.385359172545885e-05
          },
          {
            "name": "BM_dispenso/3/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 35743766.62504619,
            "cpu_time": 5597995.249999994,
            "time_unit": "ns",
            "mean": 0.005678710937499831,
            "stddev": 0.0015502408808643084
          },
          {
            "name": "BM_dispenso/4/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 34446670.12490754,
            "cpu_time": 4332923.499999995,
            "time_unit": "ns",
            "mean": 0.004379707031250013,
            "stddev": 0.0010042365910835902
          },
          {
            "name": "BM_dispenso/6/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 34135986.50013228,
            "cpu_time": 4038482.625000006,
            "time_unit": "ns",
            "mean": 0.0040653971354167595,
            "stddev": 0.0004235786027342144
          }
        ]
      }
    },
    {
      "name": "future_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:40:58-07:00",
          "host_name": "localhost",
          "executable": "./future_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial_tree<kSmallSize>/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 410,
            "real_time": 685395.1390217933,
            "cpu_time": 678990.8658536585,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kMediumSize>/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 5483278.745112543,
            "cpu_time": 5464492.627450981,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kLargeSize>/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 48867235.833313316,
            "cpu_time": 48436355.49999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kSmallSize>/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 518473436.9995567,
            "cpu_time": 96979.00000005255,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kMediumSize>/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async hangs on this platform at this tree depth",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kLargeSize>/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async hangs on this platform at this tree depth",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kSmallSize>/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 5040517.607150312,
            "cpu_time": 18101.73214285877,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kMediumSize>/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 42016983.285585806,
            "cpu_time": 32894.285714306956,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kLargeSize>/real_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 336012690.0006435,
            "cpu_time": 127966.99999984007,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kSmallSize>/real_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321,
            "real_time": 864298.1775719081,
            "cpu_time": 37966.84423675998,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kMediumSize>/real_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 6090607.840881355,
            "cpu_time": 286421.52272727195,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kLargeSize>/real_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 49559015.4283231,
            "cpu_time": 2723980.8571428396,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 485,
            "real_time": 578304.3958742672,
            "cpu_time": 576677.7195876285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 4553998.2741707545,
            "cpu_time": 4538901.177419354,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 36112145.62511122,
            "cpu_time": 35998678.625,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 492,
            "real_time": 569903.0894325783,
            "cpu_time": 565229.6036585373,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 4498795.806431454,
            "cpu_time": 4481859.112903227,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 35381546.50004799,
            "cpu_time": 34744948.37500003,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 2022280.929995759,
            "cpu_time": 115326.09000000083,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14852545.315749777,
            "cpu_time": 503042.7894736739,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 125996678.99939959,
            "cpu_time": 39531.00000009257,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:41:05-07:00",
          "host_name": "localhost",
          "executable": "./graph_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_taskflow_build_big_tree",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_build_big_tree",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 23287857.454554547,
            "cpu_time": 23076406.818181816,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::Graph>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 4741128.661037893,
            "cpu_time": 4714095.118644068,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48,
            "real_time": 5852130.562516321,
            "cpu_time": 5819251.291666665,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_chain",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_chain",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2328,
            "real_time": 120323.65335046638,
            "cpu_time": 119976.84450171824,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_group",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_group",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 206681,
            "real_time": 1363.148034890439,
            "cpu_time": 1360.1393016290815,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::Graph>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11294,
            "real_time": 24804.563219368763,
            "cpu_time": 24756.04223481494,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10841,
            "real_time": 25884.07397837702,
            "cpu_time": 25786.828060142052,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::Graph>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21672,
            "real_time": 13032.079503526009,
            "cpu_time": 13009.694029162067,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21320,
            "real_time": 12996.761960635667,
            "cpu_time": 12956.547701688558,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::Graph>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 5547313.647077638,
            "cpu_time": 5518452.784313763,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 5575856.470671829,
            "cpu_time": 5549767.098039246,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_scene_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:41:09-07:00",
          "host_name": "localhost",
          "executable": "./graph_scene_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_scene_graph_parallel_for/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_parallel_for/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 78663150.3336442,
            "cpu_time": 78039897.33333336,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_concurrent_task_set/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_concurrent_task_set/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 46913294.33323214,
            "cpu_time": 46490460.16666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_taskflow/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 47062964.83315479,
            "cpu_time": 61857.83333330826,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_partial_revaluation/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_partial_revaluation/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 257,
            "real_time": 1092137.1517884014,
            "cpu_time": 1073721.840466922,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "idle_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:41:12-07:00",
          "host_name": "localhost",
          "executable": "./idle_pool_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_tbb_mostly_idle/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3239,
            "real_time": 86.42811145395417,
            "cpu_time": 86.19453905526397,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1023,
            "real_time": 363.28877028350274,
            "cpu_time": 362.1897399804496,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1052,
            "real_time": 262.9761530415255,
            "cpu_time": 262.46885266159694,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1048,
            "real_time": 264.7457232821323,
            "cpu_time": 264.32237595419844,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 974,
            "real_time": 277.6129106769637,
            "cpu_time": 276.2974681724846,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 325,
            "real_time": 863.6351815389039,
            "cpu_time": 861.2594523076915,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79,
            "real_time": 2800.4709999975926,
            "cpu_time": 2784.616974683547,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 2671.878269228938,
            "cpu_time": 2665.493875000001,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105,
            "real_time": 2657.755219046348,
            "cpu_time": 2650.2653428571452,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 2758.0932211409304,
            "cpu_time": 2717.7376346153837,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 86042.94266660872,
            "cpu_time": 85882.77600000006,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 348001.9100006757,
            "cpu_time": 347313.341,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 263200.18300066295,
            "cpu_time": 262644.844,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 267316.64200087835,
            "cpu_time": 266661.98299999966,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 275950.7569990092,
            "cpu_time": 275330.1610000003,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100073.41266646108,
            "cpu_time": 13.923333333387689,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100091.78066684399,
            "cpu_time": 25.746333333150535,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100088.4823333763,
            "cpu_time": 22.135666666756038,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100109.03766669799,
            "cpu_time": 33.80199999991618,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100089.45433340462,
            "cpu_time": 30.36466666668976,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53870,
            "real_time": 5.388512047507226,
            "cpu_time": 5.3775156487840965,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50145,
            "real_time": 5.374452108898085,
            "cpu_time": 5.3605347093429145,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45023,
            "real_time": 6.202122315257175,
            "cpu_time": 6.190225040534833,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 21.59938240001793,
            "cpu_time": 21.552447899999994,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 880,
            "real_time": 250.70977954493918,
            "cpu_time": 249.14388409090975,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9374,
            "real_time": 29.967821740861904,
            "cpu_time": 29.90330691273739,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9531,
            "real_time": 29.369898961096787,
            "cpu_time": 29.309856992970335,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9654,
            "real_time": 30.019263103390394,
            "cpu_time": 29.95850196809607,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6857,
            "real_time": 40.114500802134486,
            "cpu_time": 40.023540032084,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 742,
            "real_time": 403.68798921687477,
            "cpu_time": 400.2272048517511,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 109,
            "real_time": 2534.003504579315,
            "cpu_time": 2527.014532110097,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 2567.7155625122136,
            "cpu_time": 2562.3329910714315,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 2501.058044236316,
            "cpu_time": 2495.55912389381,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111,
            "real_time": 2490.3746846715107,
            "cpu_time": 2482.0763693693784,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84,
            "real_time": 3068.1916428615555,
            "cpu_time": 3050.9703571428677,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/1/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100065.32266622041,
            "cpu_time": 6.806000000025847,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100077.6663331635,
            "cpu_time": 18.715000000095,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/3/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100077.85699963279,
            "cpu_time": 19.13166666689392,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/4/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100074.0899999073,
            "cpu_time": 15.485999999971986,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/6/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100078.62100004179,
            "cpu_time": 15.850666666613241,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "locality_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:41:25-07:00",
          "host_name": "localhost",
          "executable": "./locality_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 214,
            "real_time": 1308224.182241383,
            "cpu_time": 1304595.7056074766,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 52788786.400196835,
            "cpu_time": 52611031.000000015,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 422770146.00040275,
            "cpu_time": 421395738.9999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 183,
            "real_time": 1531059.7486287937,
            "cpu_time": 1525012.797814207,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 492,
            "real_time": 592366.9939032308,
            "cpu_time": 587963.223577236,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 332,
            "real_time": 863157.0873499679,
            "cpu_time": 838836.620481927,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 4771257.6274623675,
            "cpu_time": 3233403.0784313753,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 8007173.833347123,
            "cpu_time": 4019117.0833333316,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 55392414.25003638,
            "cpu_time": 54989961.00000009,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 21749720.08333498,
            "cpu_time": 21512820.583333332,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 26127320.299929123,
            "cpu_time": 24236187.900000017,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 53764961.83331862,
            "cpu_time": 38943227.4999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 81298966.5000714,
            "cpu_time": 50339869.00000009,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 459815827.00005674,
            "cpu_time": 457028957.9999995,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 153945093.49971485,
            "cpu_time": 152642030.50000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 177028611.00042355,
            "cpu_time": 157422314.49999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 274840861.00048447,
            "cpu_time": 229949894.99999985,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 362659202.9995663,
            "cpu_time": 210648271.00000018,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 214,
            "real_time": 1307148.6915816134,
            "cpu_time": 1303400.9859813123,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 699,
            "real_time": 390006.14878528926,
            "cpu_time": 388336.1745350513,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 658,
            "real_time": 356044.3936150428,
            "cpu_time": 284665.8343465035,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124,
            "real_time": 1996112.846779485,
            "cpu_time": 601604.9274193557,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55,
            "real_time": 6121975.054570198,
            "cpu_time": 1156185.7272727203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 54404838.80011016,
            "cpu_time": 54035114.799999915,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 19081154.66661305,
            "cpu_time": 18933343.733333293,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 20672425.923084326,
            "cpu_time": 15021049.07692306,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 25814794.272677813,
            "cpu_time": 7221671.454545422,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 32391594.666680448,
            "cpu_time": 3032813.6666667326,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 424796084.00026333,
            "cpu_time": 422747402.9999989,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 155418166.49965766,
            "cpu_time": 153172002.49999985,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 157455667.0002494,
            "cpu_time": 59417577.999999695,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 160290537.00003713,
            "cpu_time": 11758129.500000436,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 169323480.49977008,
            "cpu_time": 11761778.000000333,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 207,
            "real_time": 1351112.7536304593,
            "cpu_time": 1346793.2415458977,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 793,
            "real_time": 365026.70239557186,
            "cpu_time": 363462.18915510585,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 547,
            "real_time": 551542.7495414589,
            "cpu_time": 414834.6252285178,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 125,
            "real_time": 2004376.8640025519,
            "cpu_time": 528233.0320000028,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 7441747.191517199,
            "cpu_time": 1298321.3829787239,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 54312310.250224985,
            "cpu_time": 54088632.999999665,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 19111224.13330304,
            "cpu_time": 18976916.533333346,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 21933038.999962334,
            "cpu_time": 16853161.076923173,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 26438269.727309074,
            "cpu_time": 1439123.5454544409,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 34562191.00007729,
            "cpu_time": 2489384.7500000456,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 426584626.0004764,
            "cpu_time": 425353244.00000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 150174259.99940315,
            "cpu_time": 149419948.00000024,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 155882906.49975532,
            "cpu_time": 65575187.99999862,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 162923011.0002027,
            "cpu_time": 1795492.4999994403,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 174092256.99975652,
            "cpu_time": 11408391.000001572,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:47:54-07:00",
          "host_name": "localhost",
          "executable": "./nested_for_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49415,
            "real_time": 5679.4564403652275,
            "cpu_time": 5664.692522513408,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 166791057.99994433,
            "cpu_time": 166454743.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4840436002.999923,
            "cpu_time": 4831283560.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2168,
            "real_time": 126296.64252772507,
            "cpu_time": 125918.14345018448,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2663,
            "real_time": 104807.28426580124,
            "cpu_time": 104467.60458129931,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2630,
            "real_time": 109019.39847882054,
            "cpu_time": 108657.45665399257,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2447,
            "real_time": 115444.83776039345,
            "cpu_time": 115018.9820187987,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2318,
            "real_time": 122594.53968947381,
            "cpu_time": 122113.93313201031,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 170237959.49990016,
            "cpu_time": 169849814.50000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 83094608.9996966,
            "cpu_time": 82900382.33333342,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 52809640.59966209,
            "cpu_time": 52665323.799999796,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 40248849.49982758,
            "cpu_time": 40125928.99999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 26339551.222311758,
            "cpu_time": 26244092.00000007,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4813509749.9998665,
            "cpu_time": 4802596013.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2407204145.9985485,
            "cpu_time": 2402930222.9999986,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1606369535.00034,
            "cpu_time": 1603759279.000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1254140176.999499,
            "cpu_time": 1251263637.9999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 852626334.9995134,
            "cpu_time": 848322817.9999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44099,
            "real_time": 6343.2366493602185,
            "cpu_time": 6331.867026463212,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65121,
            "real_time": 4289.370571703738,
            "cpu_time": 4281.205939712203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70080,
            "real_time": 3656.074971468738,
            "cpu_time": 3648.2636843607816,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84428,
            "real_time": 3317.115459337133,
            "cpu_time": 3308.6310347278154,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87592,
            "real_time": 3217.950806010761,
            "cpu_time": 3200.2358662891534,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 166220068.5004791,
            "cpu_time": 165904007.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 81883931.66650106,
            "cpu_time": 81730574.00000043,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 52152640.5998884,
            "cpu_time": 52047218.80000136,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 40874205.49997963,
            "cpu_time": 40782525.83333253,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 27851096.624999627,
            "cpu_time": 27785260.499999963,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4815991470.000881,
            "cpu_time": 4801187072.999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2404516853.9996667,
            "cpu_time": 2397676666.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1666806416.0003269,
            "cpu_time": 1663118652.9999979,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1386478680.0004368,
            "cpu_time": 1383293456.000004,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 941762021.0006135,
            "cpu_time": 937784375.9999962,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43115,
            "real_time": 6487.0701148304315,
            "cpu_time": 6473.554169082821,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66342,
            "real_time": 4199.249193552613,
            "cpu_time": 4191.411534171482,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74538,
            "real_time": 3635.251147068784,
            "cpu_time": 3627.658871984722,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85823,
            "real_time": 3261.6344919148632,
            "cpu_time": 3254.352958997046,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85933,
            "real_time": 3210.4350947798885,
            "cpu_time": 3200.652519986548,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 167076605.49946013,
            "cpu_time": 166780230.50000235,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 83318532.66645339,
            "cpu_time": 83148522.66666624,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 53096421.99972586,
            "cpu_time": 52981478.19999883,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 38782052.49990667,
            "cpu_time": 38689001.33333142,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 25273115.888921894,
            "cpu_time": 25204491.222221274,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4832670011.999653,
            "cpu_time": 4823280844.00001,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2443111025.0006895,
            "cpu_time": 2438376755.9999967,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1742059964.9994984,
            "cpu_time": 1737795876.9999964,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1274559449.9992876,
            "cpu_time": 1271538434.9999964,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 868963368.0005499,
            "cpu_time": 865283129.9999945,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:41:52-07:00",
          "host_name": "localhost",
          "executable": "./nested_pool_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91,
            "real_time": 3052.7143626386533,
            "cpu_time": 3039.357758241758,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 162,
            "real_time": 1725.198216050041,
            "cpu_time": 1720.0363395061731,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213,
            "real_time": 1311.0043849811634,
            "cpu_time": 1307.673638497652,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 245,
            "real_time": 1139.8819183731187,
            "cpu_time": 1136.8717959183678,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 295,
            "real_time": 980.0156338993258,
            "cpu_time": 976.1214881355927,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 92605.3910000822,
            "cpu_time": 92372.48233333339,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 47964.084666697694,
            "cpu_time": 47849.930833333325,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 34299.93174995616,
            "cpu_time": 34209.51237499998,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 27905.627799918875,
            "cpu_time": 27825.010500000015,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 21975.713999988027,
            "cpu_time": 21791.684333333338,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 15115136.610000264,
            "cpu_time": 15077652.554999998,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 7559557.2109996285,
            "cpu_time": 7547521.234000001,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5225128.906000463,
            "cpu_time": 5198931.197000001,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4189370.2089982983,
            "cpu_time": 4174148.4829999995,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2970313.525000165,
            "cpu_time": 2948617.149999997,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_folly/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 47046.9144999394,
            "cpu_time": 151.5451666674513,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_folly/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 32721.63522221187,
            "cpu_time": 172.47099999985318,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_folly/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 26256.294300037553,
            "cpu_time": 165.83360000055336,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_folly/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 22499.37291662718,
            "cpu_time": 152.43924999985836,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_folly/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 19685.99892864274,
            "cpu_time": 140.98964285734286,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_folly/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1363512.9480007892,
            "cpu_time": 1105.0000000025761,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_folly/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 985011.9729999278,
            "cpu_time": 1129.4780000028481,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_folly/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 779945.5469994427,
            "cpu_time": 1138.5410000031015,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_folly/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 747863.5650004435,
            "cpu_time": 1241.3539999940326,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_folly/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 700121.8929999595,
            "cpu_time": 1378.490000000454,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_folly/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_folly/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_folly/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_folly/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_folly/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 311,
            "real_time": 874.2171607743046,
            "cpu_time": 871.6879356913126,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 471,
            "real_time": 593.5836369414889,
            "cpu_time": 592.1705859872579,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 440,
            "real_time": 648.5171113622941,
            "cpu_time": 646.1432386363633,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 472,
            "real_time": 617.9984957617631,
            "cpu_time": 610.8769279661138,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 473,
            "real_time": 507.53605919492,
            "cpu_time": 499.4763276955614,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 26738.820399987162,
            "cpu_time": 26642.281200000183,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 8693.613545498616,
            "cpu_time": 8666.020818181712,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 11112.886519986205,
            "cpu_time": 11087.810719999992,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12384.612999994715,
            "cpu_time": 12336.261999999982,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 14443.582149942813,
            "cpu_time": 14392.684949999788,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4296235.063000495,
            "cpu_time": 4284179.561000002,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 572158.9629993105,
            "cpu_time": 570953.3700000051,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 687859.5990001486,
            "cpu_time": 686206.987999995,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 832972.6869997103,
            "cpu_time": 831183.4410000003,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1237114.3419986765,
            "cpu_time": 1230471.2590000052,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1449,
            "real_time": 193.48733816334627,
            "cpu_time": 192.70786542443216,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 867,
            "real_time": 315.5924117651943,
            "cpu_time": 314.89679930795637,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1175,
            "real_time": 239.9514868088869,
            "cpu_time": 239.63714468084646,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1430,
            "real_time": 198.72244825259818,
            "cpu_time": 198.22821748251343,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1753,
            "real_time": 156.18827609792484,
            "cpu_time": 155.8130176839674,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 5180.161592593154,
            "cpu_time": 5168.7133518519795,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 4850.9469482676905,
            "cpu_time": 4840.643017241271,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71,
            "real_time": 3886.2980704258184,
            "cpu_time": 3877.3313098591893,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90,
            "real_time": 3099.1849222118617,
            "cpu_time": 3078.6348666666972,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 2849.2425673133625,
            "cpu_time": 2839.9018557692284,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 785011.7970010615,
            "cpu_time": 783373.4509999957,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 404659.6230000432,
            "cpu_time": 403821.14400000544,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 319369.9280000146,
            "cpu_time": 317766.57099999994,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 268867.1619998786,
            "cpu_time": 267224.9029999989,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 262039.9740007997,
            "cpu_time": 260119.42099999884,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "once_function_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:42:55-07:00",
          "host_name": "localhost",
          "executable": "./once_function_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_move_std_function<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4190784,
            "real_time": 66.94997690153612,
            "cpu_time": 66.79427095264276,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kSmallSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44696078,
            "real_time": 6.280473378439153,
            "cpu_time": 6.266862363180947,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kMediumSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4253905,
            "real_time": 65.9076352199143,
            "cpu_time": 65.76617766499253,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kMediumSize>",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45759771,
            "real_time": 6.1327959880093506,
            "cpu_time": 6.120098874620682,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kLargeSize>",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4044690,
            "real_time": 69.33510034138529,
            "cpu_time": 69.17054137647139,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kLargeSize>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44663109,
            "real_time": 6.299513564978442,
            "cpu_time": 6.283911158087983,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kExtraLargeSize>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3308401,
            "real_time": 84.79090654403211,
            "cpu_time": 84.6084235254433,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kExtraLargeSize>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10914074,
            "real_time": 25.703163273476257,
            "cpu_time": 25.653650048552006,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kSmallSize>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200887,
            "real_time": 1392.9191585353794,
            "cpu_time": 1390.198803307332,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kSmallSize>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18158,
            "real_time": 15440.894206410487,
            "cpu_time": 15404.594173367108,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kSmallSize>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88361,
            "real_time": 3141.3188963538773,
            "cpu_time": 3134.8513144939557,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kMediumSize>",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91996,
            "real_time": 3046.6177007596016,
            "cpu_time": 3040.4650636984197,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kMediumSize>",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9273,
            "real_time": 30537.81559359709,
            "cpu_time": 30458.589345411416,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kMediumSize>",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70857,
            "real_time": 3957.286294911814,
            "cpu_time": 3949.142385367715,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kLargeSize>",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50962,
            "real_time": 5501.283976281524,
            "cpu_time": 5489.782916683029,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kLargeSize>",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6587,
            "real_time": 42701.67192972245,
            "cpu_time": 42582.92895096402,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kLargeSize>",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50542,
            "real_time": 5558.722151877583,
            "cpu_time": 5541.57510585256,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kExtraLargeSize>",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19796,
            "real_time": 14178.460244519238,
            "cpu_time": 14146.244493837125,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kExtraLargeSize>",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2114,
            "real_time": 133131.56717196046,
            "cpu_time": 132627.303216651,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kExtraLargeSize>",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1112,
            "real_time": 256301.4055755807,
            "cpu_time": 254641.08543165404,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pipeline_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:43:04-07:00",
          "host_name": "localhost",
          "executable": "./pipeline_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 518612030.99906564,
            "cpu_time": 516453226.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 227088720.00112024,
            "cpu_time": 69128281.99999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 485735985.9997814,
            "cpu_time": 484150431.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 508602915.00066525,
            "cpu_time": 105208.0000001343,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_par/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 106832510.49977116,
            "cpu_time": 252682.49999998993,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_par/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 147814415.99916434,
            "cpu_time": 147076641.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow_par/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 506115882.99943835,
            "cpu_time": 89636.00000000405,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pool_allocator_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:43:07-07:00",
          "host_name": "localhost",
          "executable": "./pool_allocator_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_mallocfree<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 3171504.2272770177,
            "cpu_time": 3162807.727272728,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12764288.681782173,
            "cpu_time": 12709531.090909092,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1515,
            "real_time": 185259.30033076904,
            "cpu_time": 184801.0184818481,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 375,
            "real_time": 743786.1866686338,
            "cpu_time": 741736.3946666669,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3819,
            "real_time": 73305.2055512174,
            "cpu_time": 73139.7962817492,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 940,
            "real_time": 293794.134041549,
            "cpu_time": 292808.7904255321,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 11260113.600001205,
            "cpu_time": 11236297.879999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 45310542.666811675,
            "cpu_time": 45167197.66666662,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1515,
            "real_time": 185060.52475269898,
            "cpu_time": 184712.04488448845,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 375,
            "real_time": 742972.157332891,
            "cpu_time": 741262.4959999997,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3829,
            "real_time": 73700.7398799987,
            "cpu_time": 73241.27317837563,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 942,
            "real_time": 294520.71868323715,
            "cpu_time": 293853.3970276008,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/8192",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 11044482.31999413,
            "cpu_time": 11018583.239999983,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/32768",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44925846.499912344,
            "cpu_time": 44750660.16666659,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/8192",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1516,
            "real_time": 185043.61543511794,
            "cpu_time": 184717.080474934,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/32768",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 376,
            "real_time": 743036.143618117,
            "cpu_time": 741348.3324468075,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3824,
            "real_time": 73380.2209730875,
            "cpu_time": 73252.81250000004,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 941,
            "real_time": 294322.11583446217,
            "cpu_time": 293699.5260361316,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2722,
            "real_time": 102987.60727429499,
            "cpu_time": 102794.26745040425,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 673,
            "real_time": 412765.5646364461,
            "cpu_time": 412054.15750371607,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5915,
            "real_time": 47560.18005078068,
            "cpu_time": 47482.87387996624,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1476,
            "real_time": 190938.68360479805,
            "cpu_time": 190588.58062330534,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2722,
            "real_time": 102992.08486372787,
            "cpu_time": 102808.67487141798,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 674,
            "real_time": 412837.4243338856,
            "cpu_time": 412062.945103857,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5923,
            "real_time": 47479.94698640253,
            "cpu_time": 47398.95407732551,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 12,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1467,
            "real_time": 190626.61690568112,
            "cpu_time": 190273.55351056563,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2721,
            "real_time": 102989.20286616778,
            "cpu_time": 102815.59941198141,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 13,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 673,
            "real_time": 412740.4918270537,
            "cpu_time": 412033.7295690926,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5887,
            "real_time": 47662.57159827656,
            "cpu_time": 47581.0767793443,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 14,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1472,
            "real_time": 193618.04211949557,
            "cpu_time": 190663.39605978315,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 6299882.919993252,
            "cpu_time": 6252269.860000013,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "family_index": 15,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 25321330.599945214,
            "cpu_time": 25134610.399999958,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 205,
            "real_time": 1403284.6926810879,
            "cpu_time": 1400613.8341463392,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "family_index": 16,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 5504953.404765512,
            "cpu_time": 5494246.047619024,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 21622887.599914975,
            "cpu_time": 21493224.399999987,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "family_index": 17,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 80309122.6000106,
            "cpu_time": 79728503.60000017,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 203,
            "real_time": 1470129.201972193,
            "cpu_time": 1466372.8768472918,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "family_index": 18,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 5476923.529403946,
            "cpu_time": 5460665.019607851,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 26747051.266769025,
            "cpu_time": 26541226.06666659,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "family_index": 19,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 100668801.60011351,
            "cpu_time": 99942343.59999971,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197,
            "real_time": 1400960.111670872,
            "cpu_time": 1397988.0761421386,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "family_index": 20,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 5937744.0000166735,
            "cpu_time": 5926633.77551022,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 20791680.333382424,
            "cpu_time": 16962845.166666795,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "family_index": 21,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 93871224.66675161,
            "cpu_time": 78009708.00000091,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 25029213.85712398,
            "cpu_time": 16041575.999999985,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "family_index": 22,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 120719413.00018807,
            "cpu_time": 119994816.50000021,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 107697367.49989533,
            "cpu_time": 89705214.50000036,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "family_index": 23,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 306510603.99941343,
            "cpu_time": 255314039.0000017,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 31215094.25000113,
            "cpu_time": 28069374.125000212,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "family_index": 24,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 107461886.00023742,
            "cpu_time": 105969514.99999936,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 113016521.49995789,
            "cpu_time": 93558516.99999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "family_index": 25,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 532290417.000695,
            "cpu_time": 468213293.0000016,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 27231860.444468718,
            "cpu_time": 24027163.333333272,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "family_index": 26,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 125520012.4996918,
            "cpu_time": 122143409.99999961,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 34918624.764673844,
            "cpu_time": 19104540.05882363,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "family_index": 27,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 160791669.74986947,
            "cpu_time": 83580013.74999979,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 93409683.74999647,
            "cpu_time": 25095528.375000063,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "family_index": 28,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 353324905.0005907,
            "cpu_time": 221120721.4999984,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 153353661.33331263,
            "cpu_time": 85064078.6666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "family_index": 29,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 699948767.0000235,
            "cpu_time": 386371147.9999985,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 98188561.7999069,
            "cpu_time": 56965118.40000013,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "family_index": 30,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 350214514.33325034,
            "cpu_time": 134577762.33333382,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 170880763.66659757,
            "cpu_time": 114517276.99999975,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "family_index": 31,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 740298406.9995,
            "cpu_time": 377159925.00000083,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 106705051.11112207,
            "cpu_time": 25232118.88888868,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "family_index": 32,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 371980714.0005287,
            "cpu_time": 236069067.00000253,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "rw_lock_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:43:40-07:00",
          "host_name": "localhost",
          "executable": "./rw_lock_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<NopMutex>/2/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<NopMutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 299,
            "real_time": 929544.7558530779,
            "cpu_time": 926329.5986622074,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/8/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<NopMutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 293,
            "real_time": 934083.3924928682,
            "cpu_time": 929405.8941979525,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/32/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<NopMutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 305,
            "real_time": 917672.5639351434,
            "cpu_time": 914784.8426229507,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/128/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<NopMutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 300,
            "real_time": 937324.9200022352,
            "cpu_time": 933467.7133333328,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/512/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<NopMutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 297,
            "real_time": 936990.6565675302,
            "cpu_time": 933530.4781144779,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<std::shared_mutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44912330.833237015,
            "cpu_time": 44825884.83333338,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<std::shared_mutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44319648.49997409,
            "cpu_time": 44236355.49999997,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<std::shared_mutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 43844136.33336711,
            "cpu_time": 43760444.16666665,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/128/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<std::shared_mutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 43696445.33320146,
            "cpu_time": 43618187.3333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/512/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<std::shared_mutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 43647391.49999271,
            "cpu_time": 43569547.99999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<dispenso::RWLock>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14812931.894802636,
            "cpu_time": 14785695.842105282,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<dispenso::RWLock>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 13551633.28575838,
            "cpu_time": 13526145.952380933,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/32/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<dispenso::RWLock>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 13201997.85714933,
            "cpu_time": 13178278.523809515,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/128/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<dispenso::RWLock>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 13036749.809543833,
            "cpu_time": 13012802.571428547,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/512/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<dispenso::RWLock>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12991488.045455215,
            "cpu_time": 12967225.454545468,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 45217851.83351312,
            "cpu_time": 45134670.833333336,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 430456606.00000703,
            "cpu_time": 349954019.0000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2176589904.998764,
            "cpu_time": 1122665366.9999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44433893.16656976,
            "cpu_time": 44320226.66666669,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 362874046.9994227,
            "cpu_time": 313894419.9999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1909084409.0005703,
            "cpu_time": 1008395267.9999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 44250785.66691809,
            "cpu_time": 43863610.99999992,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 361754671.99973357,
            "cpu_time": 335633599.9999995,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1422208474.999934,
            "cpu_time": 762341216.0000012,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 43780099.833384156,
            "cpu_time": 43617657.83333323,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 417823947.9988406,
            "cpu_time": 408469639.9999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 911962902.9992211,
            "cpu_time": 651650528.0000011,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 43770282.16670927,
            "cpu_time": 43562612.16666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 442135512.9998119,
            "cpu_time": 437615243.9999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 669293295.0008981,
            "cpu_time": 530112638.0000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14808208.68422789,
            "cpu_time": 14766233.473684292,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 187428247.9993781,
            "cpu_time": 186830886.50000012,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 887020244.9998033,
            "cpu_time": 884581878.9999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 13540147.666693276,
            "cpu_time": 13509821.142857112,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 125367486.50018126,
            "cpu_time": 125092629.5000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 454458952.00100493,
            "cpu_time": 452825635.0000017,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 13174316.809541086,
            "cpu_time": 13135416.142857097,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 88666536.66700585,
            "cpu_time": 88279511.66666637,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 243517941.00039116,
            "cpu_time": 242660211.00000045,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12997451.590887398,
            "cpu_time": 12966344.545454623,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 77645372.25011736,
            "cpu_time": 77423268.50000048,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 167798219.9998951,
            "cpu_time": 167007291.50000092,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12963905.181839602,
            "cpu_time": 12930229.454545511,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 73627051.75000884,
            "cpu_time": 73362889.24999934,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 147917020.0000226,
            "cpu_time": 146670441.99999958,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:44:03-07:00",
          "host_name": "localhost",
          "executable": "./simple_for_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000,
            "real_time": 3.6400160752236843e-07,
            "cpu_time": 3.650000000004344e-07,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000,
            "real_time": 3.639997885329649e-07,
            "cpu_time": 3.6499999999523025e-07,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000,
            "real_time": 3.650002327049151e-07,
            "cpu_time": 3.120000000578216e-07,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3004,
            "real_time": 91045.54227707216,
            "cpu_time": 90691.11151797607,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3025,
            "real_time": 92005.24330578845,
            "cpu_time": 91705.30280991744,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2797,
            "real_time": 100232.79299280152,
            "cpu_time": 99307.38183768328,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2748,
            "real_time": 101569.0491261831,
            "cpu_time": 101109.8005822416,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2535,
            "real_time": 109073.37948739853,
            "cpu_time": 108525.10927021696,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213,
            "real_time": 1324021.5539895494,
            "cpu_time": 1309218.3051643183,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 391,
            "real_time": 712905.9462911759,
            "cpu_time": 708361.9718670093,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 527,
            "real_time": 531021.5578755296,
            "cpu_time": 527240.5787476279,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 627,
            "real_time": 450605.52312580135,
            "cpu_time": 446296.44497607683,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 694,
            "real_time": 395989.24783842673,
            "cpu_time": 391635.22334294027,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 112580809.66642107,
            "cpu_time": 112137309.66666652,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 56153088.80005614,
            "cpu_time": 55934469.59999983,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 37901834.143018015,
            "cpu_time": 37443072.428571254,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 29182514.44457888,
            "cpu_time": 28778627.444444522,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 25143932.36362428,
            "cpu_time": 24803300.09090908,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 475413.3799997362,
            "cpu_time": 31032.081999999406,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_taskflow/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 487887.13199974154,
            "cpu_time": 30502.58000000028,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_taskflow/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 388376.7569986958,
            "cpu_time": 29450.939000000177,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_taskflow/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 339490.451000529,
            "cpu_time": 29897.606000000465,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_taskflow/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 317386.021000857,
            "cpu_time": 28856.431000001237,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_taskflow/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 22203000.870360013,
            "cpu_time": 16215.185185192779,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_taskflow/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 120,
            "real_time": 23582887.766663894,
            "cpu_time": 18940.05833332398,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_taskflow/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 13557144.579990564,
            "cpu_time": 16182.750000002245,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_taskflow/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 10253444.769987253,
            "cpu_time": 20453.09999999745,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_taskflow/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 7517060.129994206,
            "cpu_time": 15677.670000009413,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_taskflow/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 152300084.33294037,
            "cpu_time": 18975.333333060007,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_taskflow/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 151988021.14303234,
            "cpu_time": 19025.14285718431,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_taskflow/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 156515455.9091091,
            "cpu_time": 18731.272727314583,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_taskflow/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 147216677.0001119,
            "cpu_time": 18784.66666660472,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_taskflow/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 150376599.16667205,
            "cpu_time": 19284.083333293012,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 355626,
            "real_time": 784.9189316885689,
            "cpu_time": 782.352811661692,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 357076,
            "real_time": 784.2254898128845,
            "cpu_time": 782.4031662727284,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 356517,
            "real_time": 785.0735280539584,
            "cpu_time": 782.9321995865545,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 356731,
            "real_time": 786.0184957256699,
            "cpu_time": 783.3469981582773,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 356551,
            "real_time": 785.3296723368471,
            "cpu_time": 782.8977285156972,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 332,
            "real_time": 843640.1114456849,
            "cpu_time": 840483.662650602,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 465,
            "real_time": 600138.8365578067,
            "cpu_time": 596342.612903223,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 672,
            "real_time": 409896.41666740115,
            "cpu_time": 406575.37053571484,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 807,
            "real_time": 331123.7943006877,
            "cpu_time": 329540.6679058253,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1046,
            "real_time": 252982.2695995691,
            "cpu_time": 250026.29158699786,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 75920489.24971095,
            "cpu_time": 75703059.24999943,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 57270130.600227274,
            "cpu_time": 57110207.20000021,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 38690896.71440763,
            "cpu_time": 38540521.428571545,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 33102457.750146642,
            "cpu_time": 32979153.874999944,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 25235859.5454015,
            "cpu_time": 25057561.727273025,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 365582,
            "real_time": 765.0873483959141,
            "cpu_time": 763.4907872925959,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 365334,
            "real_time": 768.0201733220239,
            "cpu_time": 765.467503708939,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 365206,
            "real_time": 767.0787090026342,
            "cpu_time": 765.7467648395776,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 365081,
            "real_time": 765.3510645568663,
            "cpu_time": 763.5591444090522,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 365639,
            "real_time": 767.6336468497693,
            "cpu_time": 764.1536871066812,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 330,
            "real_time": 839946.4212164853,
            "cpu_time": 835181.0454545473,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 463,
            "real_time": 596886.6457874896,
            "cpu_time": 594566.4600431971,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 638,
            "real_time": 408673.71159796545,
            "cpu_time": 406349.51724137674,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 783,
            "real_time": 336803.12771398167,
            "cpu_time": 334909.33716475137,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1058,
            "real_time": 250648.55576438655,
            "cpu_time": 248680.6937618168,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 74169616.75016864,
            "cpu_time": 73624403.75000024,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 57615349.600018814,
            "cpu_time": 57459270.79999973,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 38499698.71407665,
            "cpu_time": 38346317.571428604,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 33196800.12509707,
            "cpu_time": 33066862.374999672,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 24870553.749982115,
            "cpu_time": 24676454.750000093,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 355000,
            "real_time": 788.769884508236,
            "cpu_time": 786.8481605633802,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 355572,
            "real_time": 787.1853492389914,
            "cpu_time": 785.7495134600032,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 355417,
            "real_time": 787.6064622694886,
            "cpu_time": 785.7331528880183,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 355785,
            "real_time": 788.9788805014125,
            "cpu_time": 785.5691246117758,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 354445,
            "real_time": 790.0848989303812,
            "cpu_time": 786.7530307946151,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 358,
            "real_time": 784549.6592172775,
            "cpu_time": 780389.0474860348,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 474,
            "real_time": 593804.9999993397,
            "cpu_time": 591202.7573839615,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 700,
            "real_time": 400983.8199999649,
            "cpu_time": 397157.7357142866,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 907,
            "real_time": 306756.7375979173,
            "cpu_time": 305237.337375961,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1244,
            "real_time": 226260.86655957447,
            "cpu_time": 224036.45739549748,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 73679629.75013143,
            "cpu_time": 73466275.75000042,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 55469255.60011004,
            "cpu_time": 55322447.800000414,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 37447106.624995284,
            "cpu_time": 37315879.12499945,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 28672903.899860103,
            "cpu_time": 28540666.199999977,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 24134290.583333496,
            "cpu_time": 23964635.41666627,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:44:53-07:00",
          "host_name": "localhost",
          "executable": "./simple_pool_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3104,
            "real_time": 90.03957474230924,
            "cpu_time": 89.854168814433,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 989,
            "real_time": 259.0293478247569,
            "cpu_time": 258.4668372093023,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1060,
            "real_time": 262.4333990563585,
            "cpu_time": 261.7263622641509,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1026,
            "real_time": 273.57316764154865,
            "cpu_time": 272.50842982456135,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1034,
            "real_time": 298.0061547389893,
            "cpu_time": 297.6168607350097,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 305,
            "real_time": 910.0875213155886,
            "cpu_time": 907.1581540983601,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108,
            "real_time": 2630.096231465965,
            "cpu_time": 2623.8666759259277,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105,
            "real_time": 2660.879733334046,
            "cpu_time": 2654.605161904765,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103,
            "real_time": 2662.4582912597707,
            "cpu_time": 2655.268446601943,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 95,
            "real_time": 2860.4586210444963,
            "cpu_time": 2839.104694736839,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 228194.91799964453,
            "cpu_time": 226830.31300000023,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 265269.81800088834,
            "cpu_time": 264591.9760000002,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 262043.30699874845,
            "cpu_time": 261289.88899999994,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 270964.8700001708,
            "cpu_time": 270231.7709999997,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 307126.74900132697,
            "cpu_time": 305395.30999999976,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_folly/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 415,
            "real_time": 672.7934915669534,
            "cpu_time": 455.4195783132542,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_folly/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 462,
            "real_time": 622.4681580112516,
            "cpu_time": 580.3154307359292,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_folly/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 434,
            "real_time": 652.3204124424674,
            "cpu_time": 620.664463133639,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_folly/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 411,
            "real_time": 663.499406325511,
            "cpu_time": 628.6836204379567,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_folly/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 397,
            "real_time": 679.2606675040469,
            "cpu_time": 640.7456901763231,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_folly/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 6360.496613607128,
            "cpu_time": 4577.662477272729,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_folly/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 5870.0094468030275,
            "cpu_time": 5750.72495744681,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_folly/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 6281.448295463964,
            "cpu_time": 6182.935704545453,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_folly/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 6450.842250036327,
            "cpu_time": 6330.597818181815,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_folly/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 6499.728116301243,
            "cpu_time": 6404.836604651166,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_folly/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 642591.6270000016,
            "cpu_time": 434900.1579999996,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_folly/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 578158.0260008923,
            "cpu_time": 575683.3760000006,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_folly/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 615674.4370000524,
            "cpu_time": 613544.525,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_folly/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 631774.0729991783,
            "cpu_time": 629336.1989999991,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_folly/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 644738.7099997286,
            "cpu_time": 641160.9289999998,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10765,
            "real_time": 25.95345415691071,
            "cpu_time": 25.861830376219235,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14734,
            "real_time": 18.872207139982944,
            "cpu_time": 18.835406746301025,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1308,
            "real_time": 223.55017048906728,
            "cpu_time": 222.5209136085637,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 973,
            "real_time": 290.80397533440083,
            "cpu_time": 289.9018838643365,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 664,
            "real_time": 444.4975647583119,
            "cpu_time": 443.26212048192815,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1082,
            "real_time": 258.86781515741933,
            "cpu_time": 258.235717190388,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1798,
            "real_time": 156.22130923273875,
            "cpu_time": 155.84271968854287,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 2019.6027999918442,
            "cpu_time": 2014.9880499999992,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 2928.119788451183,
            "cpu_time": 2914.6865000000066,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61,
            "real_time": 4294.932967211219,
            "cpu_time": 4280.010278688526,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 25882.57354545931,
            "cpu_time": 25834.095181818157,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 30368.695111165936,
            "cpu_time": 30287.528666666694,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 225749.34499971278,
            "cpu_time": 225218.23599999992,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 281113.4650000895,
            "cpu_time": 280501.15199999936,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 423006.24000017706,
            "cpu_time": 421862.4059999989,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71774,
            "real_time": 3.8957624627460876,
            "cpu_time": 3.8890541700337096,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12284,
            "real_time": 20.73410827078801,
            "cpu_time": 20.695764978834216,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2260,
            "real_time": 116.03614734546915,
            "cpu_time": 115.61304159291923,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2369,
            "real_time": 118.67673490973019,
            "cpu_time": 118.45359434360599,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2165,
            "real_time": 140.77349376445852,
            "cpu_time": 140.29491547343983,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7392,
            "real_time": 38.01663068179157,
            "cpu_time": 37.94183482142864,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 672,
            "real_time": 525.5522752972432,
            "cpu_time": 523.4378050595213,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 225,
            "real_time": 1082.700568893213,
            "cpu_time": 1080.2819333333325,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 239,
            "real_time": 1192.9172217617227,
            "cpu_time": 1190.1865606694523,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 230,
            "real_time": 1296.6645304360377,
            "cpu_time": 1293.2292000000111,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 3724.5934266684344,
            "cpu_time": 3717.6035066666677,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 19238.253357018817,
            "cpu_time": 19190.744142857344,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 118575.74099940393,
            "cpu_time": 117834.9764999993,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 118438.63149988465,
            "cpu_time": 118193.0229999999,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 129276.393499822,
            "cpu_time": 128832.70900000099,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "small_buffer_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:45:15-07:00",
          "host_name": "localhost",
          "executable": "./small_buffer_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_newdelete<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 480,
            "real_time": 584017.6375007407,
            "cpu_time": 582798.8312500002,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 2515437.8482251815,
            "cpu_time": 2509441.535714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2661,
            "real_time": 105516.16572744322,
            "cpu_time": 105281.02743329573,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 665,
            "real_time": 421473.33383342664,
            "cpu_time": 420433.42706766917,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 333,
            "real_time": 843822.1891927177,
            "cpu_time": 841412.6606606605,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79,
            "real_time": 3519757.7341667837,
            "cpu_time": 3511068.6708860747,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2513,
            "real_time": 112088.35972895606,
            "cpu_time": 111866.93911659381,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 626,
            "real_time": 447662.1182107373,
            "cpu_time": 446622.1581469647,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 234,
            "real_time": 1198022.0641016474,
            "cpu_time": 1195401.3034188035,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57,
            "real_time": 4918945.105249836,
            "cpu_time": 4902126.473684212,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2449,
            "real_time": 114935.6353612535,
            "cpu_time": 114399.54389546756,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 613,
            "real_time": 458002.61174738454,
            "cpu_time": 457064.3833605222,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 64,
            "real_time": 304266.9990218627,
            "cpu_time": 3293421.1562500005,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 16,
            "real_time": 1650520.7968506853,
            "cpu_time": 14543508.562499989,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 2192,
            "real_time": 14257.33724899483,
            "cpu_time": 117101.3430656931,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 592,
            "real_time": 67319.2164276847,
            "cpu_time": 488834.38006756845,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 48,
            "real_time": 522474.75521246163,
            "cpu_time": 5184009.312499998,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 16,
            "real_time": 2673324.847648928,
            "cpu_time": 21659479.374999955,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1872,
            "real_time": 13103.715444592523,
            "cpu_time": 139004.12820512787,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 480,
            "real_time": 83240.86666675612,
            "cpu_time": 604080.0895833329,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 48,
            "real_time": 632238.6276001643,
            "cpu_time": 6431436.70833334,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 16,
            "real_time": 2408651.7695067753,
            "cpu_time": 19440617.81250001,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1648,
            "real_time": 17412.032122255594,
            "cpu_time": 163116.46237864092,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 368,
            "real_time": 78231.75781295355,
            "cpu_time": 644890.331521739,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "summing_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:45:20-07:00",
          "host_name": "localhost",
          "executable": "./summing_for_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1129160,
            "real_time": 251.80944507283513,
            "cpu_time": 251.27524088703112,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1063,
            "real_time": 263624.59548326954,
            "cpu_time": 262893.2521166511,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 27690278.79985515,
            "cpu_time": 27627797.3,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3028,
            "real_time": 90270.4190884545,
            "cpu_time": 90004.10766182322,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3102,
            "real_time": 91385.99322991195,
            "cpu_time": 91098.5009671179,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3077,
            "real_time": 90688.41631455503,
            "cpu_time": 90402.22684432876,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2798,
            "real_time": 100682.43566867914,
            "cpu_time": 100276.55575411016,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2572,
            "real_time": 108105.93506971282,
            "cpu_time": 107656.47783825822,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 772,
            "real_time": 361091.89766770863,
            "cpu_time": 359372.63860103703,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1254,
            "real_time": 226209.27751286596,
            "cpu_time": 225108.1100478469,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1387,
            "real_time": 197604.07426051615,
            "cpu_time": 196764.48449891884,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1363,
            "real_time": 202477.5891421317,
            "cpu_time": 201509.1239911962,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1233,
            "real_time": 222307.40308201284,
            "cpu_time": 220420.71613949756,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 27508091.299932856,
            "cpu_time": 27315438.699999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14526300.789408483,
            "cpu_time": 14484062.368421037,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12690290.09092099,
            "cpu_time": 12634953.00000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 13073473.523815794,
            "cpu_time": 13008318.761904748,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 13904092.550001225,
            "cpu_time": 13797030.95,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 884126,
            "real_time": 316.497034359714,
            "cpu_time": 315.8163689338406,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 883747,
            "real_time": 315.0814294150636,
            "cpu_time": 314.350609393865,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 882127,
            "real_time": 315.71763249400396,
            "cpu_time": 314.91278806793304,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 885453,
            "real_time": 316.05906242349005,
            "cpu_time": 315.2931855219875,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 881324,
            "real_time": 316.64861844198214,
            "cpu_time": 315.7995447758135,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1054,
            "real_time": 264033.47058854304,
            "cpu_time": 262742.2817836813,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2064,
            "real_time": 135175.76986459055,
            "cpu_time": 134740.69040697673,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2918,
            "real_time": 94069.00651105503,
            "cpu_time": 93754.6919122687,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2959,
            "real_time": 95777.05272045032,
            "cpu_time": 94985.6894221019,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2772,
            "real_time": 100219.8986292068,
            "cpu_time": 99471.57359307355,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 26737148.50006945,
            "cpu_time": 26676838.899999924,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14624502.526299022,
            "cpu_time": 14581206.368421031,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12813765.454583602,
            "cpu_time": 12773818.863636356,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 12872484.68179314,
            "cpu_time": 12782050.227272717,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 13727931.050016195,
            "cpu_time": 13279354.100000072,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 890461,
            "real_time": 314.4762432048644,
            "cpu_time": 313.08752545030114,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 890615,
            "real_time": 314.19116116507774,
            "cpu_time": 313.42241260252644,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 887337,
            "real_time": 315.02344092548077,
            "cpu_time": 314.07417925771114,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 889265,
            "real_time": 313.58373263371317,
            "cpu_time": 312.9672628519087,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 889034,
            "real_time": 314.89571489929745,
            "cpu_time": 314.20474020116217,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1057,
            "real_time": 264797.8524136157,
            "cpu_time": 264070.03784295166,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2068,
            "real_time": 134588.08172106647,
            "cpu_time": 134166.36460348198,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3006,
            "real_time": 96096.9474385472,
            "cpu_time": 95809.26413838958,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3105,
            "real_time": 89297.42930779717,
            "cpu_time": 88980.54170692508,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2961,
            "real_time": 95291.63019264625,
            "cpu_time": 94653.44376899648,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 27858252.799887847,
            "cpu_time": 27796103.50000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 14813534.894768197,
            "cpu_time": 14782423.263157869,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 13007861.136347027,
            "cpu_time": 12930980.318181831,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 12950212.095217597,
            "cpu_time": 12861391.523809489,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 13891178.450012377,
            "cpu_time": 13586171.90000011,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "timed_task_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:45:42-07:00",
          "host_name": "localhost",
          "executable": "./timed_task_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_dispenso<2,false>/manual_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 433.9999999999668,
            "time_unit": "ns",
            "mean": 1.3026822916666122e-05,
            "stddev": 9.531265906036618e-06
          },
          {
            "name": "BM_dispenso<4,false>/manual_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 764.0000000000309,
            "time_unit": "ns",
            "mean": 1.4833593750000033e-05,
            "stddev": 8.592811971375009e-06
          },
          {
            "name": "BM_dispenso<6,false>/manual_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 555.6666666667783,
            "time_unit": "ns",
            "mean": 1.778177083333242e-05,
            "stddev": 1.5400723025035576e-05
          },
          {
            "name": "BM_dispenso<2,true>/manual_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 451.33333333313936,
            "time_unit": "ns",
            "mean": 1.957994793744866e-05,
            "stddev": 8.26768700793036e-05
          },
          {
            "name": "BM_dispenso<4,true>/manual_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 764.0000000000309,
            "time_unit": "ns",
            "mean": 1.4620833376589697e-05,
            "stddev": 1.2271914481560082e-05
          },
          {
            "name": "BM_dispenso<6,true>/manual_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 572.9999999999509,
            "time_unit": "ns",
            "mean": 1.8441666644356137e-05,
            "stddev": 3.579859605078633e-05
          },
          {
            "name": "BM_dispenso_mixed<false>/manual_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 798.6666666666652,
            "time_unit": "ns",
            "mean": 1.613559027778929e-05,
            "stddev": 4.5668162018048904e-05
          },
          {
            "name": "BM_dispenso_mixed<true>/manual_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 589.9999999999656,
            "time_unit": "ns",
            "mean": 1.6044704889427604e-05,
            "stddev": 5.7341844383385104e-05
          },
          {
            "name": "BM_folly<2,false>/manual_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 711.666666666777,
            "time_unit": "ns",
            "mean": 7.470572916667035e-05,
            "stddev": 0.00013300534118277458
          },
          {
            "name": "BM_folly<4,false>/manual_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 607.6666666671634,
            "time_unit": "ns",
            "mean": 8.356223958333586e-05,
            "stddev": 0.00027005312700179474
          },
          {
            "name": "BM_folly<6,false>/manual_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 573.00000000024,
            "time_unit": "ns",
            "mean": 9.574322916667717e-05,
            "stddev": 0.0004091264225462286
          },
          {
            "name": "BM_folly<2,true>/manual_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 468.6666666668902,
            "time_unit": "ns",
            "mean": 6.417109385951033e-05,
            "stddev": 9.798314414891915e-06
          },
          {
            "name": "BM_folly<4,true>/manual_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 590.3333333331235,
            "time_unit": "ns",
            "mean": 7.66221352865805e-05,
            "stddev": 0.00010170333856705505
          },
          {
            "name": "BM_folly<6,true>/manual_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 624.9999999994685,
            "time_unit": "ns",
            "mean": 6.645494789463045e-05,
            "stddev": 9.144976228540142e-06
          },
          {
            "name": "BM_folly_mixed<false>/manual_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 624.9999999994685,
            "time_unit": "ns",
            "mean": 7.140833333333728e-05,
            "stddev": 0.0001969415560385558
          },
          {
            "name": "BM_folly_mixed<true>/manual_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 100000000.00000001,
            "cpu_time": 590.3333333337017,
            "time_unit": "ns",
            "mean": 6.213472213352564e-05,
            "stddev": 7.071315222038903e-06
          }
        ]
      }
    },
    {
      "name": "trivial_compute_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-13T14:46:08-07:00",
          "host_name": "localhost",
          "executable": "./trivial_compute_benchmark",
          "num_cpus": 6,
          "mhz_per_cpu": 2054,
          "cpu_scaling_enabled": true,
          "caches": [],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200395,
            "real_time": 1405.29343546074,
            "cpu_time": 1400.357723496095,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 109626069.66677414,
            "cpu_time": 108638559.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1486211970.9996479,
            "cpu_time": 1483235488.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3022,
            "real_time": 89707.3289211519,
            "cpu_time": 89393.93977498348,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3115,
            "real_time": 90376.59711030315,
            "cpu_time": 89864.45040128408,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3098,
            "real_time": 89685.87830816176,
            "cpu_time": 89381.0196901229,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2875,
            "real_time": 96953.10747812747,
            "cpu_time": 96500.23652173909,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2686,
            "real_time": 104725.39985131778,
            "cpu_time": 104241.04988830966,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 109009785.33330393,
            "cpu_time": 108779444.33333327,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 54629807.600213096,
            "cpu_time": 54509364.59999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 36805036.25014353,
            "cpu_time": 36689250.99999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 27842325.69993037,
            "cpu_time": 27738036.90000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 20210388.928587366,
            "cpu_time": 19925023.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1431572797.9992516,
            "cpu_time": 1428722246.9999995,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 716690019.0004526,
            "cpu_time": 715306461.9999992,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 477958485.00027204,
            "cpu_time": 476956507.0000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 371549829.000287,
            "cpu_time": 370501566.0000015,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 251682889.0002216,
            "cpu_time": 250444949.00000203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197650,
            "real_time": 1414.8151024542524,
            "cpu_time": 1412.261548191247,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197741,
            "real_time": 1414.4553178160272,
            "cpu_time": 1411.3342908147665,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197691,
            "real_time": 1415.6990758309155,
            "cpu_time": 1412.3785503639374,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197706,
            "real_time": 1416.666757713665,
            "cpu_time": 1412.7337359513624,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197987,
            "real_time": 1413.2402885015313,
            "cpu_time": 1410.6879441579547,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 108036694.66622524,
            "cpu_time": 107843716.3333336,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 54460974.199901104,
            "cpu_time": 54088571.80000055,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 36234879.87488261,
            "cpu_time": 36076627.24999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 27627096.499963958,
            "cpu_time": 27556703.900000203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 19139120.46158937,
            "cpu_time": 19053678.15384606,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1425677068.9989934,
            "cpu_time": 1421473540.000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 735477416.9998564,
            "cpu_time": 733963126.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 501226352.00023067,
            "cpu_time": 497932131.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 376238369.9995553,
            "cpu_time": 375136934.00000036,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 258120546.99963483,
            "cpu_time": 256859327.0000008,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197717,
            "real_time": 1417.2270062783748,
            "cpu_time": 1413.3037219864855,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197712,
            "real_time": 1416.5708151242143,
            "cpu_time": 1412.7266579671773,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197541,
            "real_time": 1416.6076966381022,
            "cpu_time": 1413.1483945105056,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197462,
            "real_time": 1419.2787523683355,
            "cpu_time": 1414.2581408068638,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197712,
            "real_time": 1417.1184854703952,
            "cpu_time": 1414.320951687301,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 108494889.33369382,
            "cpu_time": 107922812.33333276,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 98328968.00010835,
            "cpu_time": 98125295.33333248,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 64859485.75027578,
            "cpu_time": 64757709.50000026,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 48806497.99997628,
            "cpu_time": 48682189.16666696,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 32719512.14290831,
            "cpu_time": 32625966.571428325,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1425434726.0011854,
            "cpu_time": 1422884594.9999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 758288148.9997817,
            "cpu_time": 756877291.0000021,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 541271825.0008765,
            "cpu_time": 540001092.0000043,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 462697547.00006163,
            "cpu_time": 459259799.00000155,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 316888936.9990211,
            "cpu_time": 315872140.9999998,
            "time_unit": "ns"
          }
        ]
      }
    }
  ]
}


================================================
FILE: results/linux_x64.json
================================================
{
  "machine_info": {
    "timestamp": "2026-03-20T07:38:08.196941",
    "platform": "Linux",
    "platform_release": "6.19.6-100.fc42.x86_64",
    "platform_version": "#1 SMP PREEMPT_DYNAMIC Fri Mar  6 17:40:09 UTC 2026",
    "architecture": "x86_64",
    "processor": "",
    "python_version": "3.13.12",
    "cpu_model": "AMD Ryzen Threadripper PRO 7995WX 96-Cores",
    "cpu_cores": 192,
    "cache_size": "1024 KB",
    "memory_kb": 263042332,
    "memory_gb": 250.9,
    "platform_id": "linux-threadripper-192c",
    "compiler": {
      "compiler_path": "/usr/bin/c++",
      "build_type": "Release",
      "cxx_standard": "20",
      "compiler_id": "GNU",
      "compiler_version": "15.2.1",
      "compiler_summary": "GNU 15.2.1 Release C++20"
    }
  },
  "results": [
    {
      "name": "cascading_parallel_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T07:38:20-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/cascading_parallel_for_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            14.5049,
            28.4985,
            22.1855
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1466619,
            "real_time": 1433.2252991299833,
            "cpu_time": 1419.2281580969566,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/100000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13874,
            "real_time": 150639.50850288518,
            "cpu_time": 149098.23879198497,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/10000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77,
            "real_time": 27316695.9742447,
            "cpu_time": 27002613.974025987,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 284976,
            "real_time": 7433.991339623322,
            "cpu_time": 7365.246754112632,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113919,
            "real_time": 16078.47437231796,
            "cpu_time": 15956.523670327162,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 96870,
            "real_time": 23430.244214023274,
            "cpu_time": 23274.21957262311,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83169,
            "real_time": 24824.41254587218,
            "cpu_time": 24731.107696377254,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83286,
            "real_time": 19421.428259172004,
            "cpu_time": 19303.643169320185,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84426,
            "real_time": 24581.34914679514,
            "cpu_time": 24459.77029588038,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66582,
            "real_time": 28608.203102913663,
            "cpu_time": 28394.318058934827,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65480,
            "real_time": 32783.370219977776,
            "cpu_time": 32574.191890653667,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42478,
            "real_time": 46944.24080683258,
            "cpu_time": 46569.23885305334,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34100,
            "real_time": 59198.092111410944,
            "cpu_time": 58727.7187683285,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25890,
            "real_time": 81956.91969797487,
            "cpu_time": 81232.99181151015,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19747,
            "real_time": 106424.7751547934,
            "cpu_time": 105388.6476426797,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13521,
            "real_time": 155061.25812069332,
            "cpu_time": 153479.03786702125,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9854,
            "real_time": 213324.40847163877,
            "cpu_time": 211082.49391110247,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 236,
            "real_time": 33721610.09327225,
            "cpu_time": 32670107.550847456,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_omp/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13130,
            "real_time": 159872.02155234266,
            "cpu_time": 158382.93191165273,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_omp/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21076,
            "real_time": 98067.4684004659,
            "cpu_time": 97182.19814006455,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_omp/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26182,
            "real_time": 80454.20059407095,
            "cpu_time": 79618.04132610199,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_omp/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32596,
            "real_time": 67144.6689469462,
            "cpu_time": 66604.48383237215,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_omp/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39087,
            "real_time": 55964.44222031628,
            "cpu_time": 55620.20809987961,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_omp/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41691,
            "real_time": 47969.931303731566,
            "cpu_time": 47566.39584082899,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 21,
            "run_name": "BM_omp/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45387,
            "real_time": 44616.355894388995,
            "cpu_time": 44198.30647542253,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 22,
            "run_name": "BM_omp/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40083,
            "real_time": 49247.61445053098,
            "cpu_time": 48769.03724771088,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 23,
            "run_name": "BM_omp/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35391,
            "real_time": 61094.715520726924,
            "cpu_time": 60761.03308750823,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 24,
            "run_name": "BM_omp/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29240,
            "real_time": 72138.56118349626,
            "cpu_time": 71489.4794117644,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 25,
            "run_name": "BM_omp/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22642,
            "real_time": 93263.0810432683,
            "cpu_time": 92183.25240703106,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 26,
            "run_name": "BM_omp/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17914,
            "real_time": 116915.38601236366,
            "cpu_time": 115608.42218376737,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 27,
            "run_name": "BM_omp/96/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11475,
            "real_time": 171853.34684347536,
            "cpu_time": 169547.8834858378,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 28,
            "run_name": "BM_omp/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9468,
            "real_time": 222402.01414646962,
            "cpu_time": 219730.2516899038,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 29,
            "run_name": "BM_omp/192/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 56014082.13107405,
            "cpu_time": 55594554.68421061,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 30,
            "run_name": "BM_omp/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 28141241.971752606,
            "cpu_time": 27893230.708333246,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 31,
            "run_name": "BM_omp/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 23961592.211363543,
            "cpu_time": 23709887.684210505,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 32,
            "run_name": "BM_omp/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101,
            "real_time": 19775310.862062518,
            "cpu_time": 19552416.742574185,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 33,
            "run_name": "BM_omp/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 20505360.14023237,
            "cpu_time": 20296823.24999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 34,
            "run_name": "BM_omp/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 19671868.66910197,
            "cpu_time": 19207019.69000007,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 35,
            "run_name": "BM_omp/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 215,
            "real_time": 9831742.032127844,
            "cpu_time": 9764588.804651123,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 36,
            "run_name": "BM_omp/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 278,
            "real_time": 12212767.611632941,
            "cpu_time": 12130962.338129459,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 37,
            "run_name": "BM_omp/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 10737530.344737355,
            "cpu_time": 10644280.939393947,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 38,
            "run_name": "BM_omp/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 340,
            "real_time": 6821168.2674968075,
            "cpu_time": 6775358.547058835,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 39,
            "run_name": "BM_omp/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 418,
            "real_time": 5388358.449697067,
            "cpu_time": 5333214.56459329,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 40,
            "run_name": "BM_omp/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 616,
            "real_time": 3336718.295362185,
            "cpu_time": 3311058.4675324783,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 41,
            "run_name": "BM_omp/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 758,
            "real_time": 2724961.519743854,
            "cpu_time": 2679440.105540902,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 42,
            "run_name": "BM_omp/96/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 902,
            "real_time": 2214681.2216285826,
            "cpu_time": 2166333.9589800253,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 43,
            "run_name": "BM_omp/128/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 876,
            "real_time": 2213046.312721268,
            "cpu_time": 2178391.8721461277,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 44,
            "run_name": "BM_omp/192/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 56947737.388933696,
            "cpu_time": 55794519.08333352,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10260,
            "real_time": 198933.64094199566,
            "cpu_time": 197414.27027290664,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11707,
            "real_time": 181178.50977872394,
            "cpu_time": 179874.8154950032,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11388,
            "real_time": 155757.8387740387,
            "cpu_time": 154116.43774148106,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13114,
            "real_time": 152056.84276448246,
            "cpu_time": 150702.03095928082,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14535,
            "real_time": 121280.88654681788,
            "cpu_time": 120108.09797041773,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15842,
            "real_time": 127904.46490569755,
            "cpu_time": 126808.77572276315,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16530,
            "real_time": 125607.84682226961,
            "cpu_time": 125182.4002419856,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17822,
            "real_time": 115169.55459628168,
            "cpu_time": 114744.86617663596,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19361,
            "real_time": 109557.46671139008,
            "cpu_time": 108865.16641702421,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17557,
            "real_time": 118259.59770990373,
            "cpu_time": 117676.99567124121,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19087,
            "real_time": 110183.69376962693,
            "cpu_time": 109370.2007125269,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18928,
            "real_time": 111173.58717502355,
            "cpu_time": 110253.40352916406,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18585,
            "real_time": 112956.20182903424,
            "cpu_time": 111472.34694646257,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17555,
            "real_time": 117958.73329843409,
            "cpu_time": 116513.03127314162,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16310,
            "real_time": 128183.382154162,
            "cpu_time": 127571.8464132423,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2049,
            "real_time": 1026516.0654023791,
            "cpu_time": 1015771.6168862795,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1971,
            "real_time": 957365.4870926494,
            "cpu_time": 947937.6920344939,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2159,
            "real_time": 1102143.7508089882,
            "cpu_time": 1092760.4701250617,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2474,
            "real_time": 988306.6601000979,
            "cpu_time": 980825.1434923206,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1961,
            "real_time": 893393.1580934178,
            "cpu_time": 887450.6879143188,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2278,
            "real_time": 915672.8841105184,
            "cpu_time": 907949.334064971,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2684,
            "real_time": 776217.5186342198,
            "cpu_time": 769647.8975409903,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2970,
            "real_time": 705531.3538866563,
            "cpu_time": 701772.6205387255,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3614,
            "real_time": 573279.3550034877,
            "cpu_time": 569039.7147205342,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4223,
            "real_time": 489268.2841565021,
            "cpu_time": 485844.12479280616,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5297,
            "real_time": 394535.05191070074,
            "cpu_time": 391833.266943555,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6215,
            "real_time": 336112.338380494,
            "cpu_time": 333103.03266291413,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6552,
            "real_time": 293141.49450994027,
            "cpu_time": 289317.04609279695,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7002,
            "real_time": 279125.51657129393,
            "cpu_time": 275486.1458154803,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7113,
            "real_time": 296684.4456628568,
            "cpu_time": 293742.3163222256,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52,
            "real_time": 33081485.634633843,
            "cpu_time": 32720783.500000022,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103,
            "real_time": 23783631.68043383,
            "cpu_time": 23580736.44660187,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 107,
            "real_time": 21404492.000779398,
            "cpu_time": 21233644.551401865,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 123,
            "real_time": 17684118.26853436,
            "cpu_time": 17538481.80487809,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127,
            "real_time": 16629882.771925136,
            "cpu_time": 16506845.2992126,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 15109424.199908972,
            "cpu_time": 14970329.189999916,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 159,
            "real_time": 12930820.862258896,
            "cpu_time": 12819958.194968535,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186,
            "real_time": 11461749.349931074,
            "cpu_time": 11370738.44623649,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 206,
            "real_time": 10711736.66515252,
            "cpu_time": 10635137.199029211,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 169,
            "real_time": 10623305.763211653,
            "cpu_time": 10569320.781064931,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 164,
            "real_time": 11888728.280432478,
            "cpu_time": 11803730.353658633,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161,
            "real_time": 13802826.54701761,
            "cpu_time": 13698028.397515647,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 147,
            "real_time": 13644273.156857714,
            "cpu_time": 13412326.238095192,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 15178413.239773365,
            "cpu_time": 14961242.589999983,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 132,
            "real_time": 17276990.24239414,
            "cpu_time": 16945398.159090918,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_task_group/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9598,
            "real_time": 213915.5627250637,
            "cpu_time": 211805.8225671965,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_task_group/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10817,
            "real_time": 192088.05638498813,
            "cpu_time": 190459.98197282403,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_task_group/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11509,
            "real_time": 175063.17725305242,
            "cpu_time": 173567.62412025302,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_task_group/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13198,
            "real_time": 156806.49143186837,
            "cpu_time": 155303.1206243368,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_task_group/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14587,
            "real_time": 137540.0302279798,
            "cpu_time": 136231.2577637657,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_task_group/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16893,
            "real_time": 122403.1308253494,
            "cpu_time": 121113.21843367069,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_task_group/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19351,
            "real_time": 104886.61170257218,
            "cpu_time": 104290.94739289864,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_task_group/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22493,
            "real_time": 93160.11399237876,
            "cpu_time": 92252.13995465296,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_task_group/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27134,
            "real_time": 76754.90141547972,
            "cpu_time": 76017.28525097633,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_task_group/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31026,
            "real_time": 66536.04051422417,
            "cpu_time": 66221.4582608138,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_task_group/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36844,
            "real_time": 56761.01577104626,
            "cpu_time": 56256.777385733745,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_task_group/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40657,
            "real_time": 51041.092578332966,
            "cpu_time": 50493.183658410446,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/96/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_task_group/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43897,
            "real_time": 46629.690433035066,
            "cpu_time": 45982.47606897933,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/128/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_task_group/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44082,
            "real_time": 46020.03693160622,
            "cpu_time": 45425.417335873455,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/192/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_task_group/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39807,
            "real_time": 47066.06885716968,
            "cpu_time": 46861.00856633158,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb_task_group/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1963,
            "real_time": 1071773.291415081,
            "cpu_time": 1062331.1472236377,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb_task_group/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2600,
            "real_time": 781596.7480508754,
            "cpu_time": 775605.4738461688,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb_task_group/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2680,
            "real_time": 632527.2947928604,
            "cpu_time": 626417.9179104625,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb_task_group/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3669,
            "real_time": 522530.69828471803,
            "cpu_time": 517402.263014445,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb_task_group/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2878,
            "real_time": 583324.8871063414,
            "cpu_time": 578506.6619179937,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb_task_group/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3445,
            "real_time": 588184.2676244666,
            "cpu_time": 585159.49927432,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb_task_group/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4832,
            "real_time": 450804.47435245645,
            "cpu_time": 448212.7895281387,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/16/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb_task_group/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5250,
            "real_time": 380071.9295229231,
            "cpu_time": 376120.9809523778,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/24/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb_task_group/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6455,
            "real_time": 313979.94934118807,
            "cpu_time": 311129.11432997766,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/32/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb_task_group/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7827,
            "real_time": 264042.1072059679,
            "cpu_time": 262440.32809505454,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/48/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb_task_group/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10351,
            "real_time": 203398.59173080805,
            "cpu_time": 202012.96444788092,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/64/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb_task_group/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11680,
            "real_time": 174370.6011202199,
            "cpu_time": 172452.03219177903,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/96/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb_task_group/96/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13434,
            "real_time": 150160.3081067576,
            "cpu_time": 148030.938439778,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/128/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb_task_group/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15714,
            "real_time": 143185.85713128824,
            "cpu_time": 141353.70154002972,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/192/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb_task_group/192/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9985,
            "real_time": 165620.78308088367,
            "cpu_time": 143700.76154231498,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb_task_group/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 32811250.940743666,
            "cpu_time": 32492507.08955233,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb_task_group/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86,
            "real_time": 24840233.605192583,
            "cpu_time": 24548114.220930647,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb_task_group/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 95,
            "real_time": 23003428.25290404,
            "cpu_time": 22814382.97894738,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb_task_group/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 122,
            "real_time": 20658376.589455627,
            "cpu_time": 20498641.459016375,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb_task_group/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 122,
            "real_time": 16284406.253854271,
            "cpu_time": 16132455.745901842,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb_task_group/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 132,
            "real_time": 16121470.65867303,
            "cpu_time": 16010423.909091115,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb_task_group/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 132,
            "real_time": 13186186.484138792,
            "cpu_time": 13037691.287879074,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/16/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb_task_group/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 185,
            "real_time": 11967703.426961562,
            "cpu_time": 11847729.956756715,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/24/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb_task_group/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 178,
            "real_time": 12018358.325742686,
            "cpu_time": 11920957.52247181,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/32/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb_task_group/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187,
            "real_time": 10882708.165585436,
            "cpu_time": 10784798.026738195,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/48/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb_task_group/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 172,
            "real_time": 11134406.44139489,
            "cpu_time": 11052982.534883691,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/64/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb_task_group/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 173,
            "real_time": 12386022.635595605,
            "cpu_time": 12299076.127167523,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/96/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb_task_group/96/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 166,
            "real_time": 13169826.072197497,
            "cpu_time": 13004327.355422009,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/128/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb_task_group/128/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 156,
            "real_time": 14150836.063960854,
            "cpu_time": 13968463.673076935,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/192/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb_task_group/192/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 16822532.53995441,
            "cpu_time": 16334112.17999992,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_blocking/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1275424,
            "real_time": 1597.6526786908753,
            "cpu_time": 1581.5310461462236,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_blocking/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1306322,
            "real_time": 1595.858442974583,
            "cpu_time": 1579.5120368485,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_blocking/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1279342,
            "real_time": 1639.88987622159,
            "cpu_time": 1624.4220005283935,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_blocking/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1286750,
            "real_time": 1606.382994328294,
            "cpu_time": 1589.7305342918191,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_blocking/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1246534,
            "real_time": 1617.622812578432,
            "cpu_time": 1603.834055067872,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_blocking/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1295791,
            "real_time": 1595.2067424726015,
            "cpu_time": 1578.8362019801061,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_blocking/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1323083,
            "real_time": 1610.6311025143316,
            "cpu_time": 1593.1341268839226,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_blocking/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1323016,
            "real_time": 1620.081878863651,
            "cpu_time": 1602.7177622946433,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_blocking/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1294772,
            "real_time": 1599.3818494617303,
            "cpu_time": 1583.9352048082535,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_blocking/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1329852,
            "real_time": 1579.6567483116764,
            "cpu_time": 1565.5085468157176,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_blocking/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1311262,
            "real_time": 1583.2650767543917,
            "cpu_time": 1570.7045159548418,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_blocking/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1296453,
            "real_time": 1603.1236496781287,
            "cpu_time": 1590.8238385810832,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/96/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_blocking/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1275776,
            "real_time": 1606.8433902293584,
            "cpu_time": 1597.3386291951222,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/128/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_blocking/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1281346,
            "real_time": 1654.8264481226076,
            "cpu_time": 1641.579137094893,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/192/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_blocking/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1270624,
            "real_time": 1608.7302561440433,
            "cpu_time": 1595.1131176492936,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_blocking/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13502,
            "real_time": 152420.8559519597,
            "cpu_time": 150831.42178936146,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_blocking/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19153,
            "real_time": 110566.14128587516,
            "cpu_time": 109479.54748603354,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_blocking/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16551,
            "real_time": 139427.52661387512,
            "cpu_time": 138189.63397982068,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_blocking/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19192,
            "real_time": 114324.97441649658,
            "cpu_time": 113350.68278449272,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_blocking/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18581,
            "real_time": 122549.50223636653,
            "cpu_time": 121566.3421774935,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_blocking/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 248321.68449647722,
            "cpu_time": 246850.54790000434,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_blocking/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10206,
            "real_time": 212355.34019759845,
            "cpu_time": 211079.3937879688,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/16/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_blocking/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12936,
            "real_time": 166573.28223583536,
            "cpu_time": 165796.0016233768,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/24/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_blocking/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14312,
            "real_time": 147103.93585910206,
            "cpu_time": 145626.58461430782,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/32/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_blocking/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17192,
            "real_time": 133182.93788038037,
            "cpu_time": 132154.3074685905,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/48/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_blocking/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14920,
            "real_time": 131096.2630668655,
            "cpu_time": 130439.99738605747,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/64/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_blocking/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13098,
            "real_time": 147299.6465052916,
            "cpu_time": 145809.3618109632,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/96/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_blocking/96/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10157,
            "real_time": 194120.95353327875,
            "cpu_time": 191832.04597813895,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/128/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_blocking/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7041,
            "real_time": 238309.15041857274,
            "cpu_time": 229605.21687260416,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/192/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_blocking/192/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4298,
            "real_time": 559008.5814474692,
            "cpu_time": 350562.7408096766,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_blocking/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 27044821.9201838,
            "cpu_time": 26734496.159999706,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_blocking/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 23818691.519554704,
            "cpu_time": 23592028.34999962,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_blocking/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71,
            "real_time": 21320658.69002065,
            "cpu_time": 21090581.83098546,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_blocking/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86,
            "real_time": 22802100.98793167,
            "cpu_time": 22608012.61627884,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_blocking/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91,
            "real_time": 22022598.94501377,
            "cpu_time": 21795531.7472529,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_blocking/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 119,
            "real_time": 19669715.201623052,
            "cpu_time": 19486316.42857103,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_blocking/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 132,
            "real_time": 16044864.386926886,
            "cpu_time": 15909318.71969691,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/16/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_blocking/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 15773170.130560175,
            "cpu_time": 15623738.789999492,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/24/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_blocking/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154,
            "real_time": 13811662.143047947,
            "cpu_time": 13673520.772727357,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/32/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_blocking/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 168,
            "real_time": 13959985.89855264,
            "cpu_time": 13862351.738095094,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/48/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_blocking/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 150,
            "real_time": 14979659.433010966,
            "cpu_time": 14866908.526666597,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/64/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_blocking/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 143,
            "real_time": 15190604.343111595,
            "cpu_time": 15075996.349650383,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/96/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_blocking/96/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 142,
            "real_time": 14973404.627083473,
            "cpu_time": 14799341.352112954,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/128/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_blocking/128/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 15242423.189338299,
            "cpu_time": 13779709.220000312,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/192/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_blocking/192/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 16100644.620601086,
            "cpu_time": 12090932.61000021,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_cascaded/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1341848,
            "real_time": 1520.4995647436147,
            "cpu_time": 1504.9425926036524,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_cascaded/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000,
            "real_time": 1587.6488299109042,
            "cpu_time": 1571.714233000023,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_cascaded/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1316052,
            "real_time": 1562.2868869101796,
            "cpu_time": 1546.0628447811396,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_cascaded/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1351049,
            "real_time": 1563.2679377723327,
            "cpu_time": 1547.374042688329,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_cascaded/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1297175,
            "real_time": 1571.8059568422946,
            "cpu_time": 1554.6349821727752,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_cascaded/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1335590,
            "real_time": 1589.0819248360149,
            "cpu_time": 1571.4807336083697,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_cascaded/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1317072,
            "real_time": 1577.5689309645477,
            "cpu_time": 1560.4497438256744,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_cascaded/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1306692,
            "real_time": 1561.6740854247107,
            "cpu_time": 1544.5849488632048,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_cascaded/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1352591,
            "real_time": 1572.0296312435535,
            "cpu_time": 1559.7796532728796,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_cascaded/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1373694,
            "real_time": 1504.2616602293667,
            "cpu_time": 1490.0046254842725,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_cascaded/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1373875,
            "real_time": 1563.806343441008,
            "cpu_time": 1546.1980456738,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_cascaded/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1329318,
            "real_time": 1559.0777526470263,
            "cpu_time": 1542.747335099585,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/96/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_cascaded/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1254097,
            "real_time": 1580.6408682791498,
            "cpu_time": 1564.3962436717302,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/128/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_cascaded/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1272460,
            "real_time": 1524.481064226941,
            "cpu_time": 1509.2761163416094,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/192/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_cascaded/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1279013,
            "real_time": 1588.7388651041745,
            "cpu_time": 1572.289867264786,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_cascaded/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13452,
            "real_time": 155806.0507019226,
            "cpu_time": 154091.33363068505,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_cascaded/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9534,
            "real_time": 344469.0263342876,
            "cpu_time": 340899.08328089054,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_cascaded/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22084,
            "real_time": 80005.01594118224,
            "cpu_time": 79133.45784277876,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_cascaded/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28526,
            "real_time": 79085.78889543329,
            "cpu_time": 78261.01833415123,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_cascaded/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29894,
            "real_time": 57769.40553521435,
            "cpu_time": 57178.98942931936,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_cascaded/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12933,
            "real_time": 126800.94548960723,
            "cpu_time": 126018.0549756492,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_cascaded/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15501,
            "real_time": 140472.3950024435,
            "cpu_time": 139489.27869169443,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/16/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_cascaded/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16924,
            "real_time": 122526.22979598024,
            "cpu_time": 121838.07580950159,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/24/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_cascaded/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19985,
            "real_time": 106495.47675679946,
            "cpu_time": 105654.56592443858,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/32/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_cascaded/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22817,
            "real_time": 91151.52776202282,
            "cpu_time": 90663.61725906483,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/48/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_cascaded/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24292,
            "real_time": 88426.68602711258,
            "cpu_time": 87914.96743783842,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/64/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_cascaded/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19559,
            "real_time": 109635.16350378637,
            "cpu_time": 109122.58090904298,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/96/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_cascaded/96/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12431,
            "real_time": 150894.23119802756,
            "cpu_time": 149091.82535596343,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/128/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_cascaded/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9615,
            "real_time": 204875.0882977993,
            "cpu_time": 202700.13832553616,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/192/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_cascaded/192/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6177,
            "real_time": 376090.02265629283,
            "cpu_time": 307769.0497004935,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_cascaded/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 26952636.24036064,
            "cpu_time": 26616094.786666814,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_cascaded/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 40439965.76906349,
            "cpu_time": 40002735.653845966,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_cascaded/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 82,
            "real_time": 23737628.49985236,
            "cpu_time": 23472050.073171042,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_cascaded/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105,
            "real_time": 22643515.63838621,
            "cpu_time": 22335152.457143135,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_cascaded/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101,
            "real_time": 19997795.55388717,
            "cpu_time": 19689392.643564846,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_cascaded/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 17447722.88016975,
            "cpu_time": 17137007.820000462,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_cascaded/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 157,
            "real_time": 13471732.165955102,
            "cpu_time": 13289283.00000055,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/16/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_cascaded/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 179,
            "real_time": 11587558.85465747,
            "cpu_time": 11429402.93854733,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/24/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_cascaded/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 10471183.114568703,
            "cpu_time": 10338719.270000068,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/32/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_cascaded/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187,
            "real_time": 10778084.486339461,
            "cpu_time": 10650977.518716924,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/48/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_cascaded/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186,
            "real_time": 12134331.22029528,
            "cpu_time": 12026523.67741973,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/64/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_cascaded/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 13636197.925370652,
            "cpu_time": 13529918.056249812,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/96/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_cascaded/96/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161,
            "real_time": 13122847.446917284,
            "cpu_time": 12570699.037267031,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/128/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_cascaded/128/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 13663618.960294453,
            "cpu_time": 11757497.622516708,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/192/10000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_cascaded/192/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 15046445.830957964,
            "cpu_time": 9755741.720000515,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "concurrent_vector_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T07:49:00-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/concurrent_vector_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            42.9346,
            34.7827,
            29.6118
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_std_push_back_serial",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 752,
            "real_time": 2766462.8017946407,
            "cpu_time": 2738773.704787234,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_push_back_serial",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3848,
            "real_time": 551471.3684873842,
            "cpu_time": 545859.0849792098,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 488,
            "real_time": 4442104.092392627,
            "cpu_time": 4396601.540983606,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 372,
            "real_time": 5819354.911074444,
            "cpu_time": 5757064.760752691,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 927,
            "real_time": 2332591.390545644,
            "cpu_time": 2310314.833872708,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9958,
            "real_time": 223395.30517935433,
            "cpu_time": 221118.36081542468,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_push_back_serial_reserve",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9692,
            "real_time": 223203.1628181616,
            "cpu_time": 220884.06954189035,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_reserve",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 469,
            "real_time": 4571737.30703448,
            "cpu_time": 4530893.002132201,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_reserve",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1118,
            "real_time": 1895297.0268256455,
            "cpu_time": 1874853.8184257606,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 781,
            "real_time": 2695541.726019849,
            "cpu_time": 2670335.591549293,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9682,
            "real_time": 223321.78496535768,
            "cpu_time": 220949.86944846105,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18981,
            "real_time": 110669.9320345043,
            "cpu_time": 109551.47168220834,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5442,
            "real_time": 410258.408121762,
            "cpu_time": 406227.5922454979,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3260,
            "real_time": 641485.3257807029,
            "cpu_time": 635331.0981595082,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6093,
            "real_time": 332545.37928740017,
            "cpu_time": 329104.69210569514,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate_reverse",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19490,
            "real_time": 107676.25464416042,
            "cpu_time": 106612.48424833242,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate_reverse",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5302,
            "real_time": 397184.4517098201,
            "cpu_time": 393120.9866088272,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate_reverse",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4498,
            "real_time": 477181.0640233518,
            "cpu_time": 472225.40017785795,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate_reverse",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4906,
            "real_time": 440458.46819967986,
            "cpu_time": 436056.57562168746,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_lower_bound",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_std_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79,
            "real_time": 26521525.519628882,
            "cpu_time": 26262771.911392346,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_lower_bound",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60,
            "real_time": 35525346.1166285,
            "cpu_time": 35155088.49999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_lower_bound",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 34023883.50150432,
            "cpu_time": 33798048.96874994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_lower_bound",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 37957291.983886905,
            "cpu_time": 37533913.20689659,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_index",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_std_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19271,
            "real_time": 108173.62181808498,
            "cpu_time": 107061.36692439407,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_index",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3313,
            "real_time": 642129.8376002585,
            "cpu_time": 635713.2782976173,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_index",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3880,
            "real_time": 535368.4716536328,
            "cpu_time": 529771.4865979363,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_index",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3562,
            "real_time": 576431.0519265378,
            "cpu_time": 570487.4856821998,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_random",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_std_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2657,
            "real_time": 803848.9932213981,
            "cpu_time": 794526.4546480963,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_random",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1248,
            "real_time": 1579802.078478492,
            "cpu_time": 1564108.4703525587,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_random",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1790,
            "real_time": 1248877.8229209713,
            "cpu_time": 1237242.664804466,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_random",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1464,
            "real_time": 1447134.739747958,
            "cpu_time": 1432344.7752732264,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 130854719.29105765,
            "cpu_time": 122809665.00000043,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 137590424.1165913,
            "cpu_time": 129346015.17647044,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel",
            "family_index": 33,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115,
            "real_time": 35857174.60828955,
            "cpu_time": 26034102.85217389,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel",
            "family_index": 34,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39,
            "real_time": 42354153.33487953,
            "cpu_time": 41472078.6410258,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_reserve",
            "family_index": 35,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 127547476.60882357,
            "cpu_time": 116926024.49999927,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_reserve",
            "family_index": 36,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 36634093.21223188,
            "cpu_time": 26135766.863636263,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_reserve",
            "family_index": 37,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105,
            "real_time": 20481351.95260069,
            "cpu_time": 19788357.209523745,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_clear",
            "family_index": 38,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 121309710.7764851,
            "cpu_time": 115328852.05555488,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_clear",
            "family_index": 39,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 139075949.38063994,
            "cpu_time": 130123559.43750009,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_clear",
            "family_index": 40,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 36209473.991148435,
            "cpu_time": 26418202.27433625,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_clear",
            "family_index": 41,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106,
            "real_time": 20661971.972209737,
            "cpu_time": 20092523.89622646,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_10",
            "family_index": 42,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 63,
            "real_time": 33937473.810972676,
            "cpu_time": 32639038.63492082,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_10",
            "family_index": 43,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 48044665.42678866,
            "cpu_time": 44862074.63829799,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_10",
            "family_index": 44,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 431,
            "real_time": 5986262.531518902,
            "cpu_time": 5837509.074245925,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_10",
            "family_index": 45,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 850,
            "real_time": 2491046.171772348,
            "cpu_time": 2419358.322352941,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_100",
            "family_index": 46,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 14662358.529760892,
            "cpu_time": 13263495.94701987,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_100",
            "family_index": 47,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 237,
            "real_time": 18396568.97040501,
            "cpu_time": 8162850.118143484,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_100",
            "family_index": 48,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1851996.9759508967,
            "cpu_time": 1626162.8550000181,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_100",
            "family_index": 49,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7529,
            "real_time": 303953.9488707109,
            "cpu_time": 296349.11223269865,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_max",
            "family_index": 50,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2971,
            "real_time": 6523151.980814574,
            "cpu_time": 1635274.04140021,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_max",
            "family_index": 51,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 890,
            "real_time": 5782772.299933007,
            "cpu_time": 2154925.058426974,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_max",
            "family_index": 52,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3334,
            "real_time": 638700.5620856725,
            "cpu_time": 632812.0062987359,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_max",
            "family_index": 53,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34159,
            "real_time": 70178.59761575396,
            "cpu_time": 61097.10626774745,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_each_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T07:52:05-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/for_each_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            83.9683,
            53.207,
            37.0181
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14633484,
            "real_time": 143.16154594771086,
            "cpu_time": 141.7786765612345,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14604,
            "real_time": 148511.11531115035,
            "cpu_time": 147058.36483155296,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 22022279.255597435,
            "cpu_time": 21744333.46808511,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4150106,
            "real_time": 504.1451565406422,
            "cpu_time": 499.2063397898751,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2718009,
            "real_time": 776.1207873219112,
            "cpu_time": 767.7490788293926,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2690653,
            "real_time": 813.9299861373033,
            "cpu_time": 805.2857284086801,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2322202,
            "real_time": 906.9246215452739,
            "cpu_time": 898.5589470683433,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1838330,
            "real_time": 1213.149196869415,
            "cpu_time": 1202.963119244097,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1140648,
            "real_time": 1491.4937167165792,
            "cpu_time": 1473.3164438108874,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 578168,
            "real_time": 3198.479040811303,
            "cpu_time": 3173.6384493780397,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 490702,
            "real_time": 3944.953562403192,
            "cpu_time": 3905.8883660551614,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 388025,
            "real_time": 5288.227189100151,
            "cpu_time": 5253.74947748213,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 294403,
            "real_time": 7021.325825289052,
            "cpu_time": 6974.402268319259,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 204120,
            "real_time": 9814.161644778034,
            "cpu_time": 9730.779928473446,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160632,
            "real_time": 12589.963699656302,
            "cpu_time": 12507.705892972772,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/96/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113231,
            "real_time": 19302.748364242198,
            "cpu_time": 19139.737395236338,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/128/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77968,
            "real_time": 27146.68666760577,
            "cpu_time": 25857.47873486565,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/192/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47004,
            "real_time": 52255.37526613838,
            "cpu_time": 36646.701216917754,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4292,
            "real_time": 499676.7336804722,
            "cpu_time": 494462.51677539706,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8226,
            "real_time": 254567.7460576101,
            "cpu_time": 251663.27972283048,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12083,
            "real_time": 171683.6246793721,
            "cpu_time": 169897.27534552652,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15830,
            "real_time": 134198.74024236796,
            "cpu_time": 133031.57473152256,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23256,
            "real_time": 91573.577828548,
            "cpu_time": 90729.45841933273,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27952,
            "real_time": 73578.0301553282,
            "cpu_time": 72936.00232541459,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36298,
            "real_time": 51622.27599603409,
            "cpu_time": 51111.604165518795,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47078,
            "real_time": 48382.789924323144,
            "cpu_time": 48132.80254046477,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50555,
            "real_time": 47700.51891840983,
            "cpu_time": 47200.957452279894,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_for_each_n/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44694,
            "real_time": 42158.06311678808,
            "cpu_time": 41756.40891394826,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_for_each_n/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54353,
            "real_time": 40873.119955248076,
            "cpu_time": 40661.89879123503,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_for_each_n/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50235,
            "real_time": 37939.86049661772,
            "cpu_time": 37557.044391360665,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/96/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_for_each_n/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49634,
            "real_time": 40546.231674874114,
            "cpu_time": 39926.866301325674,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/128/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_for_each_n/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46111,
            "real_time": 44040.33404056522,
            "cpu_time": 42487.208778816224,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/192/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_for_each_n/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30901,
            "real_time": 74040.2906710615,
            "cpu_time": 48436.8772855246,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_for_each_n/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 51538677.27968582,
            "cpu_time": 51036377.302325346,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_for_each_n/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 25965838.741948023,
            "cpu_time": 25669375.086419642,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_for_each_n/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 116,
            "real_time": 18707137.439472213,
            "cpu_time": 18490827.344827577,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_for_each_n/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 16631091.10901132,
            "cpu_time": 16434846.000000078,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_for_each_n/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 178,
            "real_time": 11993987.88210172,
            "cpu_time": 11880173.662921315,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_for_each_n/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197,
            "real_time": 11844514.466722868,
            "cpu_time": 11700980.177664993,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_for_each_n/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 220,
            "real_time": 9716727.94125466,
            "cpu_time": 9610487.699999997,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_for_each_n/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 351,
            "real_time": 7416418.489888629,
            "cpu_time": 7334982.139601147,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_for_each_n/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 390,
            "real_time": 6219252.204904571,
            "cpu_time": 6156283.905128189,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_for_each_n/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 386,
            "real_time": 5572207.650251764,
            "cpu_time": 5530243.507772012,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_for_each_n/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 394,
            "real_time": 5483320.050844497,
            "cpu_time": 5402103.69035531,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_for_each_n/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 379,
            "real_time": 5541364.97074232,
            "cpu_time": 5489273.451187372,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/96/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_for_each_n/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 377,
            "real_time": 5747581.026467447,
            "cpu_time": 5661748.543766591,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/128/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_for_each_n/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 369,
            "real_time": 5954843.682683702,
            "cpu_time": 4833669.745257492,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/192/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_for_each_n/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 343,
            "real_time": 6479821.017191206,
            "cpu_time": 3619500.306122451,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_deque/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1525632,
            "real_time": 1386.855807290837,
            "cpu_time": 1373.2469252087055,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_deque/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 598363,
            "real_time": 3641.7574130289217,
            "cpu_time": 3601.563161492317,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_deque/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 539891,
            "real_time": 4108.569416704429,
            "cpu_time": 4066.523590873006,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_deque/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 457335,
            "real_time": 4606.53279316108,
            "cpu_time": 4563.774891490975,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_deque/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 399170,
            "real_time": 5205.444437046043,
            "cpu_time": 5173.534013077171,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_deque/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 340742,
            "real_time": 6122.559341145662,
            "cpu_time": 6089.07094810736,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_deque/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 257899,
            "real_time": 7741.884137158194,
            "cpu_time": 7664.156092113489,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_deque/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 225476,
            "real_time": 9316.01166410268,
            "cpu_time": 9234.294328443013,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_deque/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186669,
            "real_time": 11401.281321257931,
            "cpu_time": 11290.48584928412,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_deque/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 153145,
            "real_time": 13557.487687704679,
            "cpu_time": 13451.139906624423,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_deque/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 121379,
            "real_time": 16982.930465857924,
            "cpu_time": 16865.446856540377,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_deque/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 102090,
            "real_time": 21488.37703942905,
            "cpu_time": 21333.10217455185,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/96/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_deque/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78130,
            "real_time": 27163.692871145602,
            "cpu_time": 26762.68118520399,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/128/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_deque/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61799,
            "real_time": 34351.81184225907,
            "cpu_time": 33718.80098383479,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/192/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_deque/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29871,
            "real_time": 60665.23698735639,
            "cpu_time": 47903.2303572028,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_deque/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1266,
            "real_time": 1585976.955021833,
            "cpu_time": 1569219.825434445,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_deque/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 595,
            "real_time": 3476038.7747415476,
            "cpu_time": 3443716.744537814,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_deque/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 573,
            "real_time": 3726545.8236892186,
            "cpu_time": 3690165.1710297023,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_deque/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 526,
            "real_time": 3797711.5057245917,
            "cpu_time": 3764270.549429611,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_deque/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 529,
            "real_time": 4120211.8203029847,
            "cpu_time": 4089836.2287334115,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_deque/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 534,
            "real_time": 4276171.606775261,
            "cpu_time": 4239531.584269674,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n_deque/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 422,
            "real_time": 4621482.46913707,
            "cpu_time": 4583149.850710888,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n_deque/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 474,
            "real_time": 4750895.320795446,
            "cpu_time": 4707631.156118158,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n_deque/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 408,
            "real_time": 5262994.740156037,
            "cpu_time": 5223501.29656859,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_for_each_n_deque/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 397,
            "real_time": 5344901.337424383,
            "cpu_time": 5303214.3526448915,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_for_each_n_deque/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 380,
            "real_time": 5361636.694655509,
            "cpu_time": 5315152.139473717,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_for_each_n_deque/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 375,
            "real_time": 5548144.477419555,
            "cpu_time": 5507827.781333314,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/96/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_for_each_n_deque/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 363,
            "real_time": 5985087.490276262,
            "cpu_time": 5903431.355371929,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/128/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_for_each_n_deque/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 253,
            "real_time": 8301171.513730771,
            "cpu_time": 7456517.11857712,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/192/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_for_each_n_deque/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 192,
            "real_time": 10954535.130319225,
            "cpu_time": 7111589.921874995,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_for_each_n_deque/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 158208208.45412806,
            "cpu_time": 156563747.69230735,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_for_each_n_deque/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 368762707.1381679,
            "cpu_time": 365390110.2857162,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_for_each_n_deque/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 376320401.49765086,
            "cpu_time": 372554190.83333176,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_for_each_n_deque/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 378984489.8041338,
            "cpu_time": 375167200.7999957,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_for_each_n_deque/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 375148260.0113377,
            "cpu_time": 371427194.59999967,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_for_each_n_deque/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 379307672.8004962,
            "cpu_time": 375960156.3999979,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 36,
            "run_name": "BM_for_each_n_deque/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 458390798.39643085,
            "cpu_time": 454428526.6000031,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 37,
            "run_name": "BM_for_each_n_deque/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 481957544.782199,
            "cpu_time": 477608078.000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 38,
            "run_name": "BM_for_each_n_deque/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 509265998.727642,
            "cpu_time": 504993326.75000376,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 39,
            "run_name": "BM_for_each_n_deque/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 503674935.22935545,
            "cpu_time": 499944502.0000053,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 40,
            "run_name": "BM_for_each_n_deque/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 523600320.5063753,
            "cpu_time": 518301560.7500039,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 41,
            "run_name": "BM_for_each_n_deque/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 531398056.24959993,
            "cpu_time": 525224804.7500103,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/96/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 42,
            "run_name": "BM_for_each_n_deque/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 588600624.7601472,
            "cpu_time": 566950684.9999948,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/128/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 43,
            "run_name": "BM_for_each_n_deque/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 621148942.3488578,
            "cpu_time": 611464453.9999858,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/192/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 44,
            "run_name": "BM_for_each_n_deque/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 886804518.988356,
            "cpu_time": 811486442.9999973,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_list/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 641497,
            "real_time": 3171.3590820214054,
            "cpu_time": 3140.9085451685546,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_list/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 361667,
            "real_time": 6103.33547714431,
            "cpu_time": 6043.379827299647,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_list/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 309950,
            "real_time": 6815.720500013154,
            "cpu_time": 6748.583319890417,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_list/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 274546,
            "real_time": 7595.940763970903,
            "cpu_time": 7541.59172233425,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_list/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 248432,
            "real_time": 8344.007434630143,
            "cpu_time": 8270.912652154224,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_list/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 232739,
            "real_time": 9400.140711449747,
            "cpu_time": 9329.349417158262,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_list/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 184060,
            "real_time": 11093.938753262242,
            "cpu_time": 10980.549255677515,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_list/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180737,
            "real_time": 11691.362344246225,
            "cpu_time": 11629.24178225799,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_list/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 155034,
            "real_time": 13274.637421139334,
            "cpu_time": 13176.18686223684,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_list/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 144001,
            "real_time": 14953.140193234138,
            "cpu_time": 14822.33075464724,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_list/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124226,
            "real_time": 16952.032916209282,
            "cpu_time": 16839.140485888947,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_list/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108821,
            "real_time": 19074.137483038514,
            "cpu_time": 18894.386781963087,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/96/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_list/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 93158,
            "real_time": 22254.52252049462,
            "cpu_time": 22061.849417119738,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/128/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_list/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74155,
            "real_time": 26730.257272017025,
            "cpu_time": 26243.09903580358,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/192/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_list/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35729,
            "real_time": 61655.226285947356,
            "cpu_time": 39600.66993198746,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_list/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 705,
            "real_time": 3263128.5914636035,
            "cpu_time": 3237368.052482323,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_list/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 246,
            "real_time": 10622687.987525137,
            "cpu_time": 10516183.223577252,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_list/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 251,
            "real_time": 10085783.621474387,
            "cpu_time": 9985228.61354592,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_list/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 247,
            "real_time": 10070881.947709119,
            "cpu_time": 9976519.55060726,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_list/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 261,
            "real_time": 10675387.1456802,
            "cpu_time": 10576563.371647716,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_list/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197,
            "real_time": 12962231.507671403,
            "cpu_time": 12850260.802030321,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n_list/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 190,
            "real_time": 11936113.489546666,
            "cpu_time": 11833829.005263098,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n_list/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 190,
            "real_time": 12070036.420941744,
            "cpu_time": 11972011.557894815,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n_list/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 166,
            "real_time": 14066570.138289446,
            "cpu_time": 13958004.680722661,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_for_each_n_list/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154,
            "real_time": 14206274.214515148,
            "cpu_time": 14070365.616883142,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_for_each_n_list/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124,
            "real_time": 14649974.77465189,
            "cpu_time": 14525305.661290271,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_for_each_n_list/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 139,
            "real_time": 13690328.503550172,
            "cpu_time": 13561888.438848753,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/96/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_for_each_n_list/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 138,
            "real_time": 14761054.18059624,
            "cpu_time": 14618778.623188753,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/128/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_for_each_n_list/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 121,
            "real_time": 16547708.57104185,
            "cpu_time": 15939049.46280991,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/192/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_for_each_n_list/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92,
            "real_time": 17139018.46770845,
            "cpu_time": 13281723.282608975,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_set/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1741549,
            "real_time": 1196.9445597559518,
            "cpu_time": 1185.457927396819,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_set/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 790709,
            "real_time": 2198.8438946339147,
            "cpu_time": 2180.6060763188284,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_set/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 618324,
            "real_time": 2527.68118657745,
            "cpu_time": 2507.5024032707665,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_set/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 602962,
            "real_time": 2664.1779714657087,
            "cpu_time": 2639.348854488274,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_set/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 615701,
            "real_time": 2930.5785356735632,
            "cpu_time": 2904.242023319781,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_set/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 598480,
            "real_time": 3497.870373176733,
            "cpu_time": 3471.3788497527166,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_set/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 450069,
            "real_time": 4710.616894304418,
            "cpu_time": 4685.705829550608,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/16/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_set/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 395524,
            "real_time": 5396.115012094816,
            "cpu_time": 5360.162174735291,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/24/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_set/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 320585,
            "real_time": 6471.084411088349,
            "cpu_time": 6418.200680006807,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/32/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_set/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 282581,
            "real_time": 7653.727108285104,
            "cpu_time": 7604.9034648472325,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/48/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_set/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 196478,
            "real_time": 10449.629419136583,
            "cpu_time": 10409.037943179317,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/64/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_set/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154325,
            "real_time": 13398.762604364425,
            "cpu_time": 13299.642812246693,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/96/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_set/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105163,
            "real_time": 19365.62028482073,
            "cpu_time": 19130.296092732187,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/128/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_set/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79542,
            "real_time": 24608.7401998643,
            "cpu_time": 24282.757637474147,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/192/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_set/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39843,
            "real_time": 54713.62043483473,
            "cpu_time": 37113.21436638833,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_set/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1673777,
            "real_time": 1260.2231408018342,
            "cpu_time": 1248.726079997518,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_set/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 820463,
            "real_time": 2095.8539969954236,
            "cpu_time": 2072.487207832698,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_set/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 735341,
            "real_time": 2348.9970027813993,
            "cpu_time": 2330.6041632385754,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_set/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 622773,
            "real_time": 2576.933211569972,
            "cpu_time": 2552.1714364624245,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_set/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 512130,
            "real_time": 3082.9263410781014,
            "cpu_time": 3061.1226875989214,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_set/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 519794,
            "real_time": 3889.0391462683524,
            "cpu_time": 3862.1948271815395,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n_set/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 397821,
            "real_time": 4926.839427716214,
            "cpu_time": 4901.193008916078,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/16/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n_set/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 376965,
            "real_time": 5405.769002244411,
            "cpu_time": 5382.704301460389,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/24/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n_set/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 322274,
            "real_time": 6555.443706214966,
            "cpu_time": 6511.799806996512,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/32/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 24,
            "run_name": "BM_for_each_n_set/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 272508,
            "real_time": 7791.740018662105,
            "cpu_time": 7750.4932699223755,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/48/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 25,
            "run_name": "BM_for_each_n_set/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 199952,
            "real_time": 10605.03160251244,
            "cpu_time": 10540.836860846739,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/64/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 26,
            "run_name": "BM_for_each_n_set/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154665,
            "real_time": 13642.709132193484,
            "cpu_time": 13538.079733617757,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/96/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 27,
            "run_name": "BM_for_each_n_set/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98082,
            "real_time": 21116.743286699104,
            "cpu_time": 20924.403295202235,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/128/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 28,
            "run_name": "BM_for_each_n_set/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77648,
            "real_time": 28050.257882473034,
            "cpu_time": 27435.065552235843,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/192/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 29,
            "run_name": "BM_for_each_n_set/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32329,
            "real_time": 54034.11351733718,
            "cpu_time": 37919.858424326274,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_latency_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T07:59:37-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/for_latency_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            43.3701,
            40.4067,
            36.3809
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60,
            "real_time": 35209832.91673474,
            "cpu_time": 5078476.316666667,
            "time_unit": "ns",
            "mean": 0.005133739833642418,
            "stddev": 0.00048537739306183763
          },
          {
            "name": "BM_omp/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 35829096.43039878,
            "cpu_time": 5687969.327586207,
            "time_unit": "ns",
            "mean": 0.005755029733148243,
            "stddev": 0.0009481889993146526
          },
          {
            "name": "BM_omp/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 33519619.12947797,
            "cpu_time": 3398243.629032258,
            "time_unit": "ns",
            "mean": 0.0034441080361214136,
            "stddev": 0.00035234063647427447
          },
          {
            "name": "BM_omp/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65,
            "real_time": 32357082.969079223,
            "cpu_time": 2257683.584615386,
            "time_unit": "ns",
            "mean": 0.002284441162975362,
            "stddev": 0.0005381978428742822
          },
          {
            "name": "BM_omp/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 32067964.604886416,
            "cpu_time": 1977998.166666668,
            "time_unit": "ns",
            "mean": 0.0019917782981682458,
            "stddev": 0.0002594533491855389
          },
          {
            "name": "BM_omp/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 31509286.83339082,
            "cpu_time": 1423580.2121212096,
            "time_unit": "ns",
            "mean": 0.0014289448348184426,
            "stddev": 0.00024200683628786132
          },
          {
            "name": "BM_omp/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 31543902.67228513,
            "cpu_time": 1456908.6567164147,
            "time_unit": "ns",
            "mean": 0.0014654112017171374,
            "stddev": 0.0003175202013289136
          },
          {
            "name": "BM_omp/12/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 31239965.07287409,
            "cpu_time": 1156594.4705882326,
            "time_unit": "ns",
            "mean": 0.0011585379748687368,
            "stddev": 0.00020172493613242548
          },
          {
            "name": "BM_omp/16/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 31150412.51465514,
            "cpu_time": 1065149.33823529,
            "time_unit": "ns",
            "mean": 0.0010701474076246515,
            "stddev": 0.00021394244925682692
          },
          {
            "name": "BM_omp/24/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30959615.33836089,
            "cpu_time": 871033.1764705912,
            "time_unit": "ns",
            "mean": 0.0008721320910434075,
            "stddev": 0.00017734576173660804
          },
          {
            "name": "BM_omp/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30795198.20611869,
            "cpu_time": 716529.3235294109,
            "time_unit": "ns",
            "mean": 0.0007148206658551798,
            "stddev": 0.00023406752364358657
          },
          {
            "name": "BM_omp/48/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30632024.706231758,
            "cpu_time": 558932.8235294102,
            "time_unit": "ns",
            "mean": 0.0005571635796085876,
            "stddev": 0.0002111185138909317
          },
          {
            "name": "BM_omp/64/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30620872.19030432,
            "cpu_time": 544407.1911764716,
            "time_unit": "ns",
            "mean": 0.0005425733329235192,
            "stddev": 0.0002157125252829456
          },
          {
            "name": "BM_omp/96/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/96/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 30568353.231132463,
            "cpu_time": 479132.0869565181,
            "time_unit": "ns",
            "mean": 0.0004803968822934489,
            "stddev": 0.00017152748107038656
          },
          {
            "name": "BM_omp/128/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 31178996.030410584,
            "cpu_time": 946393.0151515197,
            "time_unit": "ns",
            "mean": 0.0010971788221716204,
            "stddev": 0.0013263504732911955
          },
          {
            "name": "BM_omp/192/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/192/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 35274160.516062945,
            "cpu_time": 3506632.017241379,
            "time_unit": "ns",
            "mean": 0.005196523555586564,
            "stddev": 0.001129020479089095
          },
          {
            "name": "BM_tbb/1/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 35330940.03401949,
            "cpu_time": 5218861.23728813,
            "time_unit": "ns",
            "mean": 0.005253076882859282,
            "stddev": 0.00036375733975781984
          },
          {
            "name": "BM_tbb/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 33347624.032983497,
            "cpu_time": 3235708.1612903234,
            "time_unit": "ns",
            "mean": 0.003271754322228052,
            "stddev": 0.0004407854957414872
          },
          {
            "name": "BM_tbb/3/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 32374076.726916954,
            "cpu_time": 2272832.560606064,
            "time_unit": "ns",
            "mean": 0.002303335584601331,
            "stddev": 0.00030219626468150183
          },
          {
            "name": "BM_tbb/4/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 32054679.605855864,
            "cpu_time": 1957831.1363636316,
            "time_unit": "ns",
            "mean": 0.0019789970811512885,
            "stddev": 0.00012093723597027554
          },
          {
            "name": "BM_tbb/6/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 31470874.726602975,
            "cpu_time": 1386227.3636363575,
            "time_unit": "ns",
            "mean": 0.001392580828079107,
            "stddev": 0.00019661393735983455
          },
          {
            "name": "BM_tbb/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 31162876.806068998,
            "cpu_time": 1080506.6567164187,
            "time_unit": "ns",
            "mean": 0.0010900893769880285,
            "stddev": 0.00015122791426523257
          },
          {
            "name": "BM_tbb/12/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 31032934.164586686,
            "cpu_time": 960364.0597014888,
            "time_unit": "ns",
            "mean": 0.0009625126023901932,
            "stddev": 0.00016428339048072432
          },
          {
            "name": "BM_tbb/16/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30855700.08774726,
            "cpu_time": 778634.4411764682,
            "time_unit": "ns",
            "mean": 0.0007772091074216672,
            "stddev": 0.0001812280117794162
          },
          {
            "name": "BM_tbb/24/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30791949.780370273,
            "cpu_time": 708259.7794117691,
            "time_unit": "ns",
            "mean": 0.0007049785225022145,
            "stddev": 0.0002082632926585147
          },
          {
            "name": "BM_tbb/32/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 30859108.535113975,
            "cpu_time": 777192.318840581,
            "time_unit": "ns",
            "mean": 0.0007760430031550535,
            "stddev": 0.00022144409757773707
          },
          {
            "name": "BM_tbb/48/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30647002.867601044,
            "cpu_time": 561589.7058823555,
            "time_unit": "ns",
            "mean": 0.0005585754321038942,
            "stddev": 0.0002186405694567785
          },
          {
            "name": "BM_tbb/64/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 30711009.46407819,
            "cpu_time": 617949.884057967,
            "time_unit": "ns",
            "mean": 0.0006201394017029932,
            "stddev": 0.00024837536075473013
          },
          {
            "name": "BM_tbb/96/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30738687.3242007,
            "cpu_time": 665780.338235291,
            "time_unit": "ns",
            "mean": 0.0006666529533343718,
            "stddev": 0.0002578894325759036
          },
          {
            "name": "BM_tbb/128/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30692516.735357728,
            "cpu_time": 613129.8235294112,
            "time_unit": "ns",
            "mean": 0.0006167096024214783,
            "stddev": 0.00022393093238062383
          },
          {
            "name": "BM_tbb/192/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30671944.119282722,
            "cpu_time": 594155.2647058796,
            "time_unit": "ns",
            "mean": 0.0005978880628176472,
            "stddev": 0.00024744018686023375
          },
          {
            "name": "BM_dispenso/1/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 35161656.000004224,
            "cpu_time": 5022507.169491537,
            "time_unit": "ns",
            "mean": 0.005076828349571107,
            "stddev": 0.0003944615478906766
          },
          {
            "name": "BM_dispenso/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 33066793.859688915,
            "cpu_time": 2961964.53125,
            "time_unit": "ns",
            "mean": 0.002989352746226359,
            "stddev": 0.00014433565156867016
          },
          {
            "name": "BM_dispenso/3/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65,
            "real_time": 32234470.861462448,
            "cpu_time": 2137903.015384619,
            "time_unit": "ns",
            "mean": 0.0021614360336500863,
            "stddev": 0.00026802577668034685
          },
          {
            "name": "BM_dispenso/4/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 31997484.652410176,
            "cpu_time": 1905786.4696969616,
            "time_unit": "ns",
            "mean": 0.0019200499730438671,
            "stddev": 0.00016553045373812584
          },
          {
            "name": "BM_dispenso/6/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 31623890.941420488,
            "cpu_time": 1531793.9402985016,
            "time_unit": "ns",
            "mean": 0.0015436561583583035,
            "stddev": 0.00021093115097096975
          },
          {
            "name": "BM_dispenso/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 31500270.955069963,
            "cpu_time": 1415177.9104477675,
            "time_unit": "ns",
            "mean": 0.0014189618410292402,
            "stddev": 0.0002672839821408429
          },
          {
            "name": "BM_dispenso/12/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 31135224.764618803,
            "cpu_time": 1045663.0735294183,
            "time_unit": "ns",
            "mean": 0.0010574554771130137,
            "stddev": 0.00016839017690939981
          },
          {
            "name": "BM_dispenso/16/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 31046610.366662636,
            "cpu_time": 969178.7058823511,
            "time_unit": "ns",
            "mean": 0.0009744657624704653,
            "stddev": 0.00016669616508171116
          },
          {
            "name": "BM_dispenso/24/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30891745.573813643,
            "cpu_time": 803786.661764714,
            "time_unit": "ns",
            "mean": 0.0008113640685812296,
            "stddev": 0.00018633737698669787
          },
          {
            "name": "BM_dispenso/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30853800.4550535,
            "cpu_time": 771400.7205882268,
            "time_unit": "ns",
            "mean": 0.000771559783435591,
            "stddev": 0.00020290263680790836
          },
          {
            "name": "BM_dispenso/48/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30805116.38237273,
            "cpu_time": 722494.4411764793,
            "time_unit": "ns",
            "mean": 0.0007213208437486388,
            "stddev": 0.00019749602490598935
          },
          {
            "name": "BM_dispenso/64/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30628888.51512552,
            "cpu_time": 549314.4852941219,
            "time_unit": "ns",
            "mean": 0.0005466097534415038,
            "stddev": 0.00019992657492295937
          },
          {
            "name": "BM_dispenso/96/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30629594.57234041,
            "cpu_time": 499839.3676470487,
            "time_unit": "ns",
            "mean": 0.0005466979722875883,
            "stddev": 0.000208215986748266
          },
          {
            "name": "BM_dispenso/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30545800.691470504,
            "cpu_time": 437248.05882352893,
            "time_unit": "ns",
            "mean": 0.0004678638412288445,
            "stddev": 0.00017982668413931877
          },
          {
            "name": "BM_dispenso/192/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 31041808.147325784,
            "cpu_time": 524161.7500000034,
            "time_unit": "ns",
            "mean": 0.0009528738549253082,
            "stddev": 0.00028395565427961276
          }
        ]
      }
    },
    {
      "name": "future_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:01:30-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/future_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            10.7002,
            29.6816,
            32.9683
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial_tree<kSmallSize>/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5860,
            "real_time": 315383.3762683775,
            "cpu_time": 312433.2774744027,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kMediumSize>/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 829,
            "real_time": 2624199.9649778637,
            "cpu_time": 2599321.7744270205,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kLargeSize>/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 102,
            "real_time": 20683777.34394744,
            "cpu_time": 20490668.34313725,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kSmallSize>/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 173624270.70038393,
            "cpu_time": 100517.99999999389,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kMediumSize>/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1546067071.0308478,
            "cpu_time": 101421.99999929602,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kLargeSize>/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 20324441333.99986,
            "cpu_time": 110537.0000002992,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kSmallSize>/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1059,
            "real_time": 1881214.0792421822,
            "cpu_time": 10360.08687441024,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kMediumSize>/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 202,
            "real_time": 10418999.524634922,
            "cpu_time": 8421.549504950273,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kLargeSize>/real_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 73082731.51886232,
            "cpu_time": 12975.241379293308,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kSmallSize>/real_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2389,
            "real_time": 891101.7743970414,
            "cpu_time": 21027.83424026789,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kMediumSize>/real_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 563,
            "real_time": 3709310.3518395238,
            "cpu_time": 29523.266429839427,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kLargeSize>/real_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91,
            "real_time": 22619541.362183843,
            "cpu_time": 73561.31868131798,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5852,
            "real_time": 382508.1317577007,
            "cpu_time": 348837.56203007523,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1035,
            "real_time": 1925946.0830548103,
            "cpu_time": 1896109.8502415458,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 166,
            "real_time": 12806701.222827366,
            "cpu_time": 11526635.28313253,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6895,
            "real_time": 315058.768383802,
            "cpu_time": 286836.56345177646,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1071,
            "real_time": 1882936.0279714626,
            "cpu_time": 1824981.6106442572,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 166,
            "real_time": 12628674.385816712,
            "cpu_time": 11097613.216867467,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1674,
            "real_time": 1218033.8709797652,
            "cpu_time": 54056.25567502932,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 452,
            "real_time": 4577371.980005925,
            "cpu_time": 62798.165929201146,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73,
            "real_time": 28287060.726242624,
            "cpu_time": 74424.90410960974,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:02:43-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/graph_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            115.462,
            56.7637,
            42.0908
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_taskflow_build_big_tree",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_build_big_tree",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 719,
            "real_time": 3003573.408952664,
            "cpu_time": 2976054.2545201667,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::Graph>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1188,
            "real_time": 1525105.680974031,
            "cpu_time": 1510754.6052188552,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1378,
            "real_time": 1541630.6205106026,
            "cpu_time": 1525054.0181422352,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_chain",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_chain",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47083,
            "real_time": 45528.82212239234,
            "cpu_time": 45052.51509037232,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_group",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_group",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4536838,
            "real_time": 463.87087945716155,
            "cpu_time": 459.20989618760933,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::Graph>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100986,
            "real_time": 19602.78458372719,
            "cpu_time": 19412.67665815063,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 114806,
            "real_time": 18466.922687013728,
            "cpu_time": 18284.567479051628,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::Graph>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 276514,
            "real_time": 7736.775617154331,
            "cpu_time": 7660.2829910963,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264415,
            "real_time": 7873.851180787661,
            "cpu_time": 7797.638677835972,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::Graph>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1529,
            "real_time": 1384985.9745220384,
            "cpu_time": 1370985.95029433,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1512,
            "real_time": 1420770.4933902258,
            "cpu_time": 1406304.0462963758,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_scene_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:03:13-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/graph_scene_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            70.7266,
            51.5146,
            40.8096
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_scene_graph_parallel_for/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_parallel_for/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 164,
            "real_time": 13178492.165113822,
            "cpu_time": 10916983.878048796,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_concurrent_task_set/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_concurrent_task_set/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 182,
            "real_time": 11504798.613304457,
            "cpu_time": 11173955.428571424,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_taskflow/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180,
            "real_time": 11724416.132912867,
            "cpu_time": 139186.88333332972,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_partial_revaluation/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_partial_revaluation/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5587,
            "real_time": 374316.1036985847,
            "cpu_time": 336338.19545373437,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "idle_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:03:27-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/idle_pool_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            89.6924,
            56.2075,
            42.4517
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_tbb_mostly_idle/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83697,
            "real_time": 24.1410683527769,
            "cpu_time": 23.908552218120125,
            "time_unit": "us",
            "\t0 User": 2.001061,
            "\t1 System": 4.099999999999937e-05
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73865,
            "real_time": 26.70804280757446,
            "cpu_time": 26.431217112299464,
            "time_unit": "us",
            "\t0 User": 3.9052109999999995,
            "\t1 System": 8.999999999998592e-06
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74460,
            "real_time": 29.046221756246997,
            "cpu_time": 28.741927934461454,
            "time_unit": "us",
            "\t0 User": 6.421445,
            "\t1 System": 5.000000000001531e-06
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75054,
            "real_time": 28.256679524107906,
            "cpu_time": 27.956716337570285,
            "time_unit": "us",
            "\t0 User": 8.397722000000002,
            "\t1 System": 9.999999999975306e-07
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67147,
            "real_time": 30.801611762979128,
            "cpu_time": 30.50194321414211,
            "time_unit": "us",
            "\t0 User": 12.281970000000001,
            "\t1 System": 0.006926999999999999
          },
          {
            "name": "BM_tbb_mostly_idle/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_mostly_idle/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52731,
            "real_time": 33.88700060717647,
            "cpu_time": 33.55776173408434,
            "time_unit": "us",
            "\t0 User": 14.140971,
            "\t1 System": 0.009936
          },
          {
            "name": "BM_tbb_mostly_idle/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_mostly_idle/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38432,
            "real_time": 55.993939556153705,
            "cpu_time": 55.45277828372192,
            "time_unit": "us",
            "\t0 User": 25.487938,
            "\t1 System": 0.11197099999999999
          },
          {
            "name": "BM_tbb_mostly_idle/16/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_mostly_idle/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38487,
            "real_time": 53.50397882457161,
            "cpu_time": 52.9906788785823,
            "time_unit": "us",
            "\t0 User": 32.45311699999999,
            "\t1 System": 0.21598499999999998
          },
          {
            "name": "BM_tbb_mostly_idle/24/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_mostly_idle/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24269,
            "real_time": 86.09150414229958,
            "cpu_time": 85.19163113436885,
            "time_unit": "us",
            "\t0 User": 48.471153999999984,
            "\t1 System": 1.178373
          },
          {
            "name": "BM_tbb_mostly_idle/32/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_mostly_idle/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18116,
            "real_time": 115.306525610755,
            "cpu_time": 114.13070407374686,
            "time_unit": "us",
            "\t0 User": 61.39823099999998,
            "\t1 System": 4.662468
          },
          {
            "name": "BM_tbb_mostly_idle/48/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_mostly_idle/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7728,
            "real_time": 266.19544332110905,
            "cpu_time": 263.6586621376813,
            "time_unit": "us",
            "\t0 User": 86.38783800000004,
            "\t1 System": 11.333889999999998
          },
          {
            "name": "BM_tbb_mostly_idle/64/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_mostly_idle/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4733,
            "real_time": 444.0785125732271,
            "cpu_time": 438.8644918656246,
            "time_unit": "us",
            "\t0 User": 115.44605499999994,
            "\t1 System": 17.488584999999997
          },
          {
            "name": "BM_tbb_mostly_idle/96/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_mostly_idle/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2827,
            "real_time": 740.871004210762,
            "cpu_time": 730.7380937389464,
            "time_unit": "us",
            "\t0 User": 168.70491600000003,
            "\t1 System": 29.679035999999996
          },
          {
            "name": "BM_tbb_mostly_idle/128/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_mostly_idle/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2089,
            "real_time": 1013.5170813942352,
            "cpu_time": 1000.9232115844904,
            "time_unit": "us",
            "\t0 User": 219.987485,
            "\t1 System": 47.661455000000004
          },
          {
            "name": "BM_tbb_mostly_idle/192/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_mostly_idle/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1249,
            "real_time": 1564.6192073498407,
            "cpu_time": 1558.6541841473174,
            "time_unit": "us",
            "\t0 User": 290.934577,
            "\t1 System": 75.44308900000001
          },
          {
            "name": "BM_tbb_mostly_idle/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8473,
            "real_time": 237.24057652690655,
            "cpu_time": 235.0852315590698,
            "time_unit": "us",
            "\t0 User": 1.991950000000088,
            "\t1 System": 0.0
          },
          {
            "name": "BM_tbb_mostly_idle/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7893,
            "real_time": 253.05716482169402,
            "cpu_time": 250.73903167363483,
            "time_unit": "us",
            "\t0 User": 3.554507000000058,
            "\t1 System": 0.40170999999998
          },
          {
            "name": "BM_tbb_mostly_idle/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7689,
            "real_time": 261.4750776451775,
            "cpu_time": 258.89708882819656,
            "time_unit": "us",
            "\t0 User": 5.002443999999969,
            "\t1 System": 0.9657990000000041
          },
          {
            "name": "BM_tbb_mostly_idle/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7505,
            "real_time": 281.04991658276157,
            "cpu_time": 278.467962958028,
            "time_unit": "us",
            "\t0 User": 6.737370000000055,
            "\t1 System": 1.6167619999999943
          },
          {
            "name": "BM_tbb_mostly_idle/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4814,
            "real_time": 336.2862033615493,
            "cpu_time": 333.24928770253297,
            "time_unit": "us",
            "\t0 User": 7.392745999999988,
            "\t1 System": 2.2267579999999896
          },
          {
            "name": "BM_tbb_mostly_idle/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb_mostly_idle/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4341,
            "real_time": 393.467231966217,
            "cpu_time": 389.93108822851843,
            "time_unit": "us",
            "\t0 User": 10.409781000000066,
            "\t1 System": 3.1125900000000115
          },
          {
            "name": "BM_tbb_mostly_idle/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb_mostly_idle/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2829,
            "real_time": 703.1549575823215,
            "cpu_time": 696.4316104630593,
            "time_unit": "us",
            "\t0 User": 18.35961999999995,
            "\t1 System": 5.270372000000009
          },
          {
            "name": "BM_tbb_mostly_idle/16/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb_mostly_idle/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2133,
            "real_time": 1022.7745809198034,
            "cpu_time": 1013.2275513361461,
            "time_unit": "us",
            "\t0 User": 27.26016699999991,
            "\t1 System": 7.302404999999965
          },
          {
            "name": "BM_tbb_mostly_idle/24/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb_mostly_idle/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 994,
            "real_time": 2040.8301871416104,
            "cpu_time": 2019.8267857142828,
            "time_unit": "us",
            "\t0 User": 38.986143000000084,
            "\t1 System": 9.205777000000012
          },
          {
            "name": "BM_tbb_mostly_idle/32/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb_mostly_idle/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 840,
            "real_time": 2476.697644063582,
            "cpu_time": 2446.611094047626,
            "time_unit": "us",
            "\t0 User": 53.4396999999999,
            "\t1 System": 12.414769999999976
          },
          {
            "name": "BM_tbb_mostly_idle/48/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb_mostly_idle/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 564,
            "real_time": 3656.5529715917105,
            "cpu_time": 3611.7897907801494,
            "time_unit": "us",
            "\t0 User": 80.08018900000002,
            "\t1 System": 17.804092000000026
          },
          {
            "name": "BM_tbb_mostly_idle/64/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb_mostly_idle/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 427,
            "real_time": 4945.5835292032925,
            "cpu_time": 4881.2406533957865,
            "time_unit": "us",
            "\t0 User": 109.23116299999992,
            "\t1 System": 24.218011999999987
          },
          {
            "name": "BM_tbb_mostly_idle/96/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb_mostly_idle/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 274,
            "real_time": 7870.291996183023,
            "cpu_time": 7768.3328649634805,
            "time_unit": "us",
            "\t0 User": 165.73897899999997,
            "\t1 System": 38.51627500000001
          },
          {
            "name": "BM_tbb_mostly_idle/128/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb_mostly_idle/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 193,
            "real_time": 10680.594606387762,
            "cpu_time": 10542.008544041422,
            "time_unit": "us",
            "\t0 User": 211.52255700000023,
            "\t1 System": 49.023158999999964
          },
          {
            "name": "BM_tbb_mostly_idle/192/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb_mostly_idle/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 140,
            "real_time": 15888.419471281979,
            "cpu_time": 15819.053885714344,
            "time_unit": "us",
            "\t0 User": 293.6984739999998,
            "\t1 System": 80.19129699999996
          },
          {
            "name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 24993.739764918977,
            "cpu_time": 24786.604530864177,
            "time_unit": "us",
            "\t0 User": 2.0077909999999974,
            "\t1 System": 0.0
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 25686.179948910976,
            "cpu_time": 25416.3043846154,
            "time_unit": "us",
            "\t0 User": 3.363712000000305,
            "\t1 System": 0.6017699999999877
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74,
            "real_time": 27120.01293155088,
            "cpu_time": 26856.722581081,
            "time_unit": "us",
            "\t0 User": 4.7814549999998235,
            "\t1 System": 1.1826780000000099
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 27634.592146462048,
            "cpu_time": 27349.955544117678,
            "time_unit": "us",
            "\t0 User": 5.7397900000000845,
            "\t1 System": 1.6996149999999943
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 32100.157101906963,
            "cpu_time": 31773.294288135512,
            "time_unit": "us",
            "\t0 User": 8.311905999999908,
            "\t1 System": 2.9380559999999605
          },
          {
            "name": "BM_tbb_mostly_idle/8/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb_mostly_idle/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57,
            "real_time": 37571.71964950925,
            "cpu_time": 37196.09642105257,
            "time_unit": "us",
            "\t0 User": 12.699098000000049,
            "\t1 System": 4.262792999999988
          },
          {
            "name": "BM_tbb_mostly_idle/12/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb_mostly_idle/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 71298.95682114044,
            "cpu_time": 70591.28885714269,
            "time_unit": "us",
            "\t0 User": 18.44854000000032,
            "\t1 System": 5.325367999999912
          },
          {
            "name": "BM_tbb_mostly_idle/16/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb_mostly_idle/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 97146.88304600051,
            "cpu_time": 96144.7634999999,
            "time_unit": "us",
            "\t0 User": 26.300814999999602,
            "\t1 System": 7.539790000000039
          },
          {
            "name": "BM_tbb_mostly_idle/24/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb_mostly_idle/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 191379.4213323854,
            "cpu_time": 189764.42566666662,
            "time_unit": "us",
            "\t0 User": 43.82737300000008,
            "\t1 System": 10.766155000000026
          },
          {
            "name": "BM_tbb_mostly_idle/32/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb_mostly_idle/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 253836.3761268556,
            "cpu_time": 251100.53625000006,
            "time_unit": "us",
            "\t0 User": 52.12466799999993,
            "\t1 System": 12.145520999999917
          },
          {
            "name": "BM_tbb_mostly_idle/48/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb_mostly_idle/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 379963.83500285447,
            "cpu_time": 375454.293333334,
            "time_unit": "us",
            "\t0 User": 88.35955299999978,
            "\t1 System": 19.70217200000002
          },
          {
            "name": "BM_tbb_mostly_idle/64/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb_mostly_idle/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 492460.22501029074,
            "cpu_time": 486303.72824999667,
            "time_unit": "us",
            "\t0 User": 101.5119850000001,
            "\t1 System": 22.990192999999977
          },
          {
            "name": "BM_tbb_mostly_idle/96/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb_mostly_idle/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 800471.5946735814,
            "cpu_time": 791465.9123333319,
            "time_unit": "us",
            "\t0 User": 184.79752700000017,
            "\t1 System": 42.67829700000004
          },
          {
            "name": "BM_tbb_mostly_idle/128/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb_mostly_idle/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1073014.4805274904,
            "cpu_time": 1062066.9599999958,
            "time_unit": "us",
            "\t0 User": 218.22935800000005,
            "\t1 System": 52.95066600000007
          },
          {
            "name": "BM_tbb_mostly_idle/192/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb_mostly_idle/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1533121.9850340858,
            "cpu_time": 1271573.0670000056,
            "time_unit": "us",
            "\t0 User": 197.2768460000002,
            "\t1 System": 56.288734999999974
          },
          {
            "name": "BM_tbb_very_idle/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100084.50980586487,
            "cpu_time": 13.968523809975999,
            "time_unit": "us",
            "\t0 User": 0.00030399999968722113,
            "\t1 System": 6.60000000607397e-05
          },
          {
            "name": "BM_tbb_very_idle/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100099.73704637516,
            "cpu_time": 20.438857142884054,
            "time_unit": "us",
            "\t0 User": 0.014020000000073196,
            "\t1 System": 0.018474999999966712
          },
          {
            "name": "BM_tbb_very_idle/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100088.5654717595,
            "cpu_time": 23.774571428838325,
            "time_unit": "us",
            "\t0 User": 0.02483200000006036,
            "\t1 System": 0.03606400000001031
          },
          {
            "name": "BM_tbb_very_idle/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100091.66628809735,
            "cpu_time": 24.181952380921583,
            "time_unit": "us",
            "\t0 User": 0.04112799999984418,
            "\t1 System": 0.04865299999994477
          },
          {
            "name": "BM_tbb_very_idle/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100092.53005098019,
            "cpu_time": 24.933571428472792,
            "time_unit": "us",
            "\t0 User": 0.07512700000006589,
            "\t1 System": 0.07358600000009119
          },
          {
            "name": "BM_tbb_very_idle/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_very_idle/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100091.31337782102,
            "cpu_time": 24.691333333662225,
            "time_unit": "us",
            "\t0 User": 0.08434699999997974,
            "\t1 System": 0.12075600000002851
          },
          {
            "name": "BM_tbb_very_idle/12/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_very_idle/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100092.768003898,
            "cpu_time": 24.821000000061066,
            "time_unit": "us",
            "\t0 User": 0.10868699999991804,
            "\t1 System": 0.15461999999990894
          },
          {
            "name": "BM_tbb_very_idle/16/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_very_idle/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100090.79504691597,
            "cpu_time": 24.677428571283684,
            "time_unit": "us",
            "\t0 User": 0.15060600000015256,
            "\t1 System": 0.2294049999999288
          },
          {
            "name": "BM_tbb_very_idle/24/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_very_idle/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100091.8422415409,
            "cpu_time": 25.396047618642115,
            "time_unit": "us",
            "\t0 User": 0.2339710000001105,
            "\t1 System": 0.32533999999998287
          },
          {
            "name": "BM_tbb_very_idle/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_very_idle/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100093.89442746484,
            "cpu_time": 25.03609523785144,
            "time_unit": "us",
            "\t0 User": 0.28244900000026973,
            "\t1 System": 0.3881580000000895
          },
          {
            "name": "BM_tbb_very_idle/48/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_very_idle/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100093.6893313857,
            "cpu_time": 24.87957142908012,
            "time_unit": "us",
            "\t0 User": 0.4111290000000736,
            "\t1 System": 0.6274030000000721
          },
          {
            "name": "BM_tbb_very_idle/64/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_very_idle/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100096.92804444404,
            "cpu_time": 24.720000000332305,
            "time_unit": "us",
            "\t0 User": 0.3753879999999299,
            "\t1 System": 0.5289239999999609
          },
          {
            "name": "BM_tbb_very_idle/96/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_very_idle/96/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100095.84642968894,
            "cpu_time": 25.069190476485694,
            "time_unit": "us",
            "\t0 User": 0.7819660000000113,
            "\t1 System": 0.8930619999999863
          },
          {
            "name": "BM_tbb_very_idle/128/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_very_idle/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100096.95523573707,
            "cpu_time": 27.91633333336969,
            "time_unit": "us",
            "\t0 User": 1.338474999999562,
            "\t1 System": 1.625312000000008
          },
          {
            "name": "BM_tbb_very_idle/192/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_very_idle/192/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100098.73604645864,
            "cpu_time": 24.694952380904006,
            "time_unit": "us",
            "\t0 User": 1.3011139999998704,
            "\t1 System": 1.593119999999999
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 336962,
            "real_time": 6.090463844541889,
            "cpu_time": 6.028629878146509,
            "time_unit": "us",
            "\t0 User": 2.739396999999826,
            "\t1 System": 1.3192739999999503
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 358380,
            "real_time": 6.661289544623591,
            "cpu_time": 6.596157826887674,
            "time_unit": "us",
            "\t0 User": 3.1780429999998887,
            "\t1 System": 1.545074999999997
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 407493,
            "real_time": 4.612891171194431,
            "cpu_time": 4.5676627819373525,
            "time_unit": "us",
            "\t0 User": 3.1657539999996516,
            "\t1 System": 2.2593200000000024
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 324212,
            "real_time": 6.577072088322926,
            "cpu_time": 6.537046506606828,
            "time_unit": "us",
            "\t0 User": 4.565743999999995,
            "\t1 System": 3.440949000000046
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 126481,
            "real_time": 12.8352910872034,
            "cpu_time": 12.786795534507158,
            "time_unit": "us",
            "\t0 User": 5.552136000000246,
            "\t1 System": 3.574491999999964
          },
          {
            "name": "BM_dispenso_mostly_idle/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_mostly_idle/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5877,
            "real_time": 330.0981250610117,
            "cpu_time": 328.72231972094704,
            "time_unit": "us",
            "\t0 User": 9.914661000000251,
            "\t1 System": 4.860095000000001
          },
          {
            "name": "BM_dispenso_mostly_idle/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_mostly_idle/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4440,
            "real_time": 438.05853693393635,
            "cpu_time": 436.3536797297312,
            "time_unit": "us",
            "\t0 User": 14.337288000000171,
            "\t1 System": 5.55324399999995
          },
          {
            "name": "BM_dispenso_mostly_idle/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_mostly_idle/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4498,
            "real_time": 491.5541718654308,
            "cpu_time": 489.3009933303684,
            "time_unit": "us",
            "\t0 User": 19.15163799999982,
            "\t1 System": 5.323113999999919
          },
          {
            "name": "BM_dispenso_mostly_idle/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_mostly_idle/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3996,
            "real_time": 474.3194309281403,
            "cpu_time": 472.4639789789781,
            "time_unit": "us",
            "\t0 User": 17.648209999999835,
            "\t1 System": 3.903577999999925
          },
          {
            "name": "BM_dispenso_mostly_idle/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_mostly_idle/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4375,
            "real_time": 477.40821692028214,
            "cpu_time": 475.4531837714304,
            "time_unit": "us",
            "\t0 User": 20.27731100000028,
            "\t1 System": 3.4506659999999556
          },
          {
            "name": "BM_dispenso_mostly_idle/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_mostly_idle/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4349,
            "real_time": 471.12525384860305,
            "cpu_time": 467.87364336628787,
            "time_unit": "us",
            "\t0 User": 22.350099999999657,
            "\t1 System": 2.4520049999999856
          },
          {
            "name": "BM_dispenso_mostly_idle/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_mostly_idle/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5713,
            "real_time": 412.81347313079755,
            "cpu_time": 410.92497024330305,
            "time_unit": "us",
            "\t0 User": 27.997849999999744,
            "\t1 System": 1.9030480000000125
          },
          {
            "name": "BM_dispenso_mostly_idle/96/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_mostly_idle/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5186,
            "real_time": 399.09782761893234,
            "cpu_time": 397.20398688777533,
            "time_unit": "us",
            "\t0 User": 29.6721179999995,
            "\t1 System": 0.728572999999983
          },
          {
            "name": "BM_dispenso_mostly_idle/128/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_mostly_idle/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5211,
            "real_time": 407.35327768259464,
            "cpu_time": 405.3849019382052,
            "time_unit": "us",
            "\t0 User": 43.251508999999714,
            "\t1 System": 0.5368609999999308
          },
          {
            "name": "BM_dispenso_mostly_idle/192/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_mostly_idle/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4613,
            "real_time": 502.5944144630782,
            "cpu_time": 499.86012638196075,
            "time_unit": "us",
            "\t0 User": 79.97183300000052,
            "\t1 System": 0.5085450000000264
          },
          {
            "name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88872,
            "real_time": 21.286698937246648,
            "cpu_time": 21.095986857503124,
            "time_unit": "us",
            "\t0 User": 2.524529000000257,
            "\t1 System": 1.2220129999999472
          },
          {
            "name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90919,
            "real_time": 21.071808995184323,
            "cpu_time": 20.87686380184557,
            "time_unit": "us",
            "\t0 User": 2.515773000000081,
            "\t1 System": 1.277311999999938
          },
          {
            "name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79806,
            "real_time": 24.70661612022917,
            "cpu_time": 24.57379456431837,
            "time_unit": "us",
            "\t0 User": 2.8915939999997136,
            "\t1 System": 1.9616000000000895
          },
          {
            "name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73627,
            "real_time": 26.2714674918092,
            "cpu_time": 26.127909842856656,
            "time_unit": "us",
            "\t0 User": 3.2927569999992556,
            "\t1 System": 2.5173509999999624
          },
          {
            "name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66576,
            "real_time": 32.37181714234788,
            "cpu_time": 32.18647832552257,
            "time_unit": "us",
            "\t0 User": 5.0386310000003505,
            "\t1 System": 3.8571529999999257
          },
          {
            "name": "BM_dispenso_mostly_idle/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_mostly_idle/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 563,
            "real_time": 3071.5768562537182,
            "cpu_time": 3057.8423357016236,
            "time_unit": "us",
            "\t0 User": 8.947132000000238,
            "\t1 System": 4.21333699999991
          },
          {
            "name": "BM_dispenso_mostly_idle/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_mostly_idle/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 469,
            "real_time": 4629.588424305576,
            "cpu_time": 4609.603066098082,
            "time_unit": "us",
            "\t0 User": 16.51624500000071,
            "\t1 System": 5.694422000000031
          },
          {
            "name": "BM_dispenso_mostly_idle/16/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_mostly_idle/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 435,
            "real_time": 4736.116423068204,
            "cpu_time": 4715.931744827612,
            "time_unit": "us",
            "\t0 User": 17.84020000000055,
            "\t1 System": 4.9337620000000015
          },
          {
            "name": "BM_dispenso_mostly_idle/24/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_mostly_idle/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 472,
            "real_time": 4723.002237338826,
            "cpu_time": 4697.790080508429,
            "time_unit": "us",
            "\t0 User": 20.064794000000802,
            "\t1 System": 4.742722999999955
          },
          {
            "name": "BM_dispenso_mostly_idle/32/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_mostly_idle/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 419,
            "real_time": 4882.209928459566,
            "cpu_time": 4846.996536992814,
            "time_unit": "us",
            "\t0 User": 20.29996899999969,
            "\t1 System": 3.2609180000000606
          },
          {
            "name": "BM_dispenso_mostly_idle/48/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_mostly_idle/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 436,
            "real_time": 4800.202263841713,
            "cpu_time": 4768.276665137627,
            "time_unit": "us",
            "\t0 User": 23.0100450000009,
            "\t1 System": 2.452778999999964
          },
          {
            "name": "BM_dispenso_mostly_idle/64/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_mostly_idle/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 498,
            "real_time": 4452.238417600832,
            "cpu_time": 4423.2976184739,
            "time_unit": "us",
            "\t0 User": 26.02731500000027,
            "\t1 System": 1.9419960000000174
          },
          {
            "name": "BM_dispenso_mostly_idle/96/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_mostly_idle/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 537,
            "real_time": 3980.2945027773026,
            "cpu_time": 3956.2323314711484,
            "time_unit": "us",
            "\t0 User": 32.7218469999998,
            "\t1 System": 0.7013150000000223
          },
          {
            "name": "BM_dispenso_mostly_idle/128/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_mostly_idle/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 521,
            "real_time": 4116.552756319891,
            "cpu_time": 4097.568474088313,
            "time_unit": "us",
            "\t0 User": 44.49541700000009,
            "\t1 System": 0.4938319999999976
          },
          {
            "name": "BM_dispenso_mostly_idle/192/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_mostly_idle/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 438,
            "real_time": 5079.684369733821,
            "cpu_time": 5054.402429223745,
            "time_unit": "us",
            "\t0 User": 77.818937,
            "\t1 System": 0.47049300000003313
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1142,
            "real_time": 1800.194836299925,
            "cpu_time": 1783.8815770577933,
            "time_unit": "us",
            "\t0 User": 2.6281890000000203,
            "\t1 System": 1.4507679999999255
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1169,
            "real_time": 1804.6367125764132,
            "cpu_time": 1786.3980367835693,
            "time_unit": "us",
            "\t0 User": 2.6848540000000867,
            "\t1 System": 1.4918609999999717
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1136,
            "real_time": 1825.4806751608323,
            "cpu_time": 1806.7864225352266,
            "time_unit": "us",
            "\t0 User": 2.662188000000242,
            "\t1 System": 1.462071999999921
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1054,
            "real_time": 1841.0117675124384,
            "cpu_time": 1821.985672675522,
            "time_unit": "us",
            "\t0 User": 2.5227169999998296,
            "\t1 System": 1.3590750000000753
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 999,
            "real_time": 1999.8306516199946,
            "cpu_time": 1981.4386836836757,
            "time_unit": "us",
            "\t0 User": 2.649032000000261,
            "\t1 System": 1.456103999999982
          },
          {
            "name": "BM_dispenso_mostly_idle/8/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_mostly_idle/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 27330.306269844077,
            "cpu_time": 27216.83615254203,
            "time_unit": "us",
            "\t0 User": 7.876062999999704,
            "\t1 System": 3.759867999999983
          },
          {
            "name": "BM_dispenso_mostly_idle/12/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_mostly_idle/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 454410.11800430715,
            "cpu_time": 451206.163400002,
            "time_unit": "us",
            "\t0 User": 17.261225000000195,
            "\t1 System": 6.048064999999951
          },
          {
            "name": "BM_dispenso_mostly_idle/16/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_mostly_idle/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 484685.65923394635,
            "cpu_time": 482792.58374999755,
            "time_unit": "us",
            "\t0 User": 16.489579999999478,
            "\t1 System": 4.752347999999984
          },
          {
            "name": "BM_dispenso_mostly_idle/24/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_mostly_idle/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 510501.09625793993,
            "cpu_time": 508138.551750001,
            "time_unit": "us",
            "\t0 User": 19.39972200000011,
            "\t1 System": 3.842789000000039
          },
          {
            "name": "BM_dispenso_mostly_idle/32/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_mostly_idle/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 518575.38876356557,
            "cpu_time": 516094.2122499961,
            "time_unit": "us",
            "\t0 User": 21.067317000000003,
            "\t1 System": 3.1678660000000036
          },
          {
            "name": "BM_dispenso_mostly_idle/48/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_mostly_idle/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 477148.31298799254,
            "cpu_time": 474264.48275000154,
            "time_unit": "us",
            "\t0 User": 21.113709999999628,
            "\t1 System": 2.2601339999999936
          },
          {
            "name": "BM_dispenso_mostly_idle/64/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_mostly_idle/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 353284.20421574265,
            "cpu_time": 351043.92180000443,
            "time_unit": "us",
            "\t0 User": 21.698883000000023,
            "\t1 System": 1.2280550000000403
          },
          {
            "name": "BM_dispenso_mostly_idle/96/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_mostly_idle/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 394612.78920061886,
            "cpu_time": 391916.6289999964,
            "time_unit": "us",
            "\t0 User": 30.89940899999965,
            "\t1 System": 0.6122799999999415
          },
          {
            "name": "BM_dispenso_mostly_idle/128/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_mostly_idle/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 424057.16499779373,
            "cpu_time": 422461.8354000029,
            "time_unit": "us",
            "\t0 User": 43.777665999999954,
            "\t1 System": 0.5466339999999263
          },
          {
            "name": "BM_dispenso_mostly_idle/192/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_mostly_idle/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 508110.6572470162,
            "cpu_time": 505527.1842499991,
            "time_unit": "us",
            "\t0 User": 73.10029099999974,
            "\t1 System": 0.4642709999999397
          },
          {
            "name": "BM_dispenso_very_idle/1/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100106.0947088436,
            "cpu_time": 11.838666667006779,
            "time_unit": "us",
            "\t0 User": 0.0,
            "\t1 System": 0.00030200000003333116
          },
          {
            "name": "BM_dispenso_very_idle/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100085.70019133567,
            "cpu_time": 19.632333332776312,
            "time_unit": "us",
            "\t0 User": 0.0008760000000620494,
            "\t1 System": 0.001504999999951906
          },
          {
            "name": "BM_dispenso_very_idle/3/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100091.04443070966,
            "cpu_time": 20.731190476551223,
            "time_unit": "us",
            "\t0 User": 0.0022420000004785834,
            "\t1 System": 0.0021309999999630236
          },
          {
            "name": "BM_dispenso_very_idle/4/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100086.6897603763,
            "cpu_time": 19.33052380937148,
            "time_unit": "us",
            "\t0 User": 0.0011840000006486662,
            "\t1 System": 0.004345999999941341
          },
          {
            "name": "BM_dispenso_very_idle/6/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100092.01014015291,
            "cpu_time": 22.843333333693884,
            "time_unit": "us",
            "\t0 User": 0.0013039999994362006,
            "\t1 System": 0.008021999999982654
          },
          {
            "name": "BM_dispenso_very_idle/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_very_idle/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100092.20519125284,
            "cpu_time": 22.560619048103295,
            "time_unit": "us",
            "\t0 User": 0.004823000000214961,
            "\t1 System": 0.009165000000052714
          },
          {
            "name": "BM_dispenso_very_idle/12/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_very_idle/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100089.49257566461,
            "cpu_time": 18.83580952451103,
            "time_unit": "us",
            "\t0 User": 0.0035139999999955762,
            "\t1 System": 0.01791800000000876
          },
          {
            "name": "BM_dispenso_very_idle/16/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_very_idle/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100092.7007102984,
            "cpu_time": 21.24742857203966,
            "time_unit": "us",
            "\t0 User": 0.003642000000581902,
            "\t1 System": 0.027519999999981337
          },
          {
            "name": "BM_dispenso_very_idle/24/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_very_idle/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100091.80133541425,
            "cpu_time": 22.32471428528914,
            "time_unit": "us",
            "\t0 User": 0.02641499999936059,
            "\t1 System": 0.025900999999976193
          },
          {
            "name": "BM_dispenso_very_idle/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_very_idle/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100088.27214395361,
            "cpu_time": 21.08176190469229,
            "time_unit": "us",
            "\t0 User": 0.03545600000052218,
            "\t1 System": 0.04550500000004831
          },
          {
            "name": "BM_dispenso_very_idle/48/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_very_idle/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100087.2773306799,
            "cpu_time": 20.142619048004104,
            "time_unit": "us",
            "\t0 User": 0.09912600000006933,
            "\t1 System": 0.060067000000003645
          },
          {
            "name": "BM_dispenso_very_idle/64/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_very_idle/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100087.9612429777,
            "cpu_time": 21.120333332733242,
            "time_unit": "us",
            "\t0 User": 0.17547500000000582,
            "\t1 System": 0.0735140000000456
          },
          {
            "name": "BM_dispenso_very_idle/96/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_very_idle/96/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100087.1757610834,
            "cpu_time": 19.87447619117553,
            "time_unit": "us",
            "\t0 User": 0.7437929999996413,
            "\t1 System": 0.15202500000009422
          },
          {
            "name": "BM_dispenso_very_idle/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_very_idle/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100089.2536193576,
            "cpu_time": 22.464333332956294,
            "time_unit": "us",
            "\t0 User": 1.6097660000004907,
            "\t1 System": 0.2196320000000469
          },
          {
            "name": "BM_dispenso_very_idle/192/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_very_idle/192/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100087.49195618466,
            "cpu_time": 19.194571428843552,
            "time_unit": "us",
            "\t0 User": 3.5052969999997003,
            "\t1 System": 0.2047380000000203
          }
        ]
      }
    },
    {
      "name": "locality_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:08:51-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/locality_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            8.63428,
            29.2842,
            35.1152
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4467,
            "real_time": 480353.76899437245,
            "cpu_time": 475409.8251623012,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 107,
            "real_time": 19846445.63494372,
            "cpu_time": 19639637.58878505,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 156416370.7751924,
            "cpu_time": 154734172.07692307,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3863,
            "real_time": 534361.3310804078,
            "cpu_time": 529036.263525757,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12909,
            "real_time": 157284.85754507312,
            "cpu_time": 155749.9486404833,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27367,
            "real_time": 78043.87620448327,
            "cpu_time": 77289.05999926924,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15730,
            "real_time": 133447.4162711926,
            "cpu_time": 132226.42860775575,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8782,
            "real_time": 239162.16897872597,
            "cpu_time": 236549.64780232307,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 97,
            "real_time": 21772791.061444763,
            "cpu_time": 21542589.237113394,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 355,
            "real_time": 5690581.259042235,
            "cpu_time": 5651125.05352112,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1360,
            "real_time": 1669702.3227393134,
            "cpu_time": 1658121.4661764712,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3056,
            "real_time": 668435.1734407442,
            "cpu_time": 656756.5582460737,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2715,
            "real_time": 774372.5856306447,
            "cpu_time": 763187.1856353577,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 170459606.1608754,
            "cpu_time": 168584541.3333335,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 84148783.33591062,
            "cpu_time": 82843687.14814807,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83,
            "real_time": 36167961.78372958,
            "cpu_time": 35604370.54216863,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 130,
            "real_time": 14011402.531132963,
            "cpu_time": 13862615.123076912,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 231,
            "real_time": 8495982.679905314,
            "cpu_time": 8345203.316017313,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1485,
            "real_time": 1439896.1380494114,
            "cpu_time": 1426051.4303030313,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1561,
            "real_time": 1210384.0480525761,
            "cpu_time": 1203088.2876361303,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2503,
            "real_time": 843992.7275303366,
            "cpu_time": 840570.9444666397,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5029,
            "real_time": 414450.3189527502,
            "cpu_time": 410947.73573274957,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5981,
            "real_time": 348640.0967992654,
            "cpu_time": 344355.77896672953,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 24177972.943024624,
            "cpu_time": 23941525.397727277,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 17294070.61954581,
            "cpu_time": 17171793.469026547,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 249,
            "real_time": 8296017.341472836,
            "cpu_time": 8255325.012048177,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 366,
            "real_time": 5445941.191362573,
            "cpu_time": 5400739.838797802,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 408,
            "real_time": 5159468.507318867,
            "cpu_time": 5117355.656862757,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 177064761.9171844,
            "cpu_time": 175107850.08333346,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 94932420.31682959,
            "cpu_time": 94180194.54545468,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 98954595.3925699,
            "cpu_time": 98249126.91304323,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 75029463.62850101,
            "cpu_time": 74282487.79166679,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 88977990.62348592,
            "cpu_time": 87764863.75000007,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3839,
            "real_time": 539907.091952216,
            "cpu_time": 535665.7942172438,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8221,
            "real_time": 271509.64724545006,
            "cpu_time": 269242.86461500934,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6664,
            "real_time": 344468.83403013006,
            "cpu_time": 342352.49129651923,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8601,
            "real_time": 227994.07928970255,
            "cpu_time": 226727.38658295563,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5886,
            "real_time": 303918.0292229564,
            "cpu_time": 297444.50492694625,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 93,
            "real_time": 21814113.86086976,
            "cpu_time": 21549618.838709723,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 102,
            "real_time": 16498608.54890636,
            "cpu_time": 16303002.107843062,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 164,
            "real_time": 12999220.683080394,
            "cpu_time": 12843724.408536525,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 300,
            "real_time": 7029749.196566021,
            "cpu_time": 6963892.039999991,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 383,
            "real_time": 5443119.798974175,
            "cpu_time": 5203166.634464755,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 174024473.74964443,
            "cpu_time": 172216383.08333334,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 95830952.2386463,
            "cpu_time": 94833069.19047625,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 71422076.30289097,
            "cpu_time": 70728989.36666642,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 77103628.03431806,
            "cpu_time": 76431135.31034471,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 92233525.54545271,
            "cpu_time": 80345077.12500034,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/100000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3687,
            "real_time": 567718.7187480456,
            "cpu_time": 561633.10686195,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/100000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8995,
            "real_time": 261377.23079859265,
            "cpu_time": 258869.05636464708,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/100000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6374,
            "real_time": 338593.3023187205,
            "cpu_time": 335487.4185754633,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/100000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4825,
            "real_time": 435125.38425296237,
            "cpu_time": 430946.9181347126,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/100000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2595,
            "real_time": 737377.609976842,
            "cpu_time": 726777.5260115605,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/4000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 21319798.693982694,
            "cpu_time": 21061728.653061394,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/4000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 117,
            "real_time": 16379193.09338093,
            "cpu_time": 16258027.81196587,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/4000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 229,
            "real_time": 9331350.288492136,
            "cpu_time": 9270290.314410426,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/4000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 352,
            "real_time": 6106910.429066788,
            "cpu_time": 6043535.994318202,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/4000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 340,
            "real_time": 6293437.967542559,
            "cpu_time": 6191966.311764679,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/32000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 175459722.83308706,
            "cpu_time": 174102192.75000098,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/32000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 103129494.2674178,
            "cpu_time": 102306697.94736819,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/32000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 78476420.64526942,
            "cpu_time": 77753759.64285709,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/32000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 94186897.07483819,
            "cpu_time": 93118468.00000012,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/32000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 87019019.08032596,
            "cpu_time": 76187329.1200004,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:12:06-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/nested_for_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            23.8145,
            28.7622,
            33.8779
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 388866,
            "real_time": 5427.8580203368365,
            "cpu_time": 5378.794163027882,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90,
            "real_time": 18798118.90086987,
            "cpu_time": 18605127.855555557,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2045379820.0469465,
            "cpu_time": 2023726905.0000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108092,
            "real_time": 19408.487602948073,
            "cpu_time": 19190.45988602303,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 189397,
            "real_time": 11423.086859705572,
            "cpu_time": 11307.770144194477,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 222203,
            "real_time": 9464.396425552022,
            "cpu_time": 9370.828728685025,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 254053,
            "real_time": 7570.0686352147,
            "cpu_time": 7508.7373500805,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 314949,
            "real_time": 6083.302649153736,
            "cpu_time": 6036.424202013654,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 318761,
            "real_time": 6959.21636301693,
            "cpu_time": 6906.70619053147,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 389378,
            "real_time": 5547.880920465606,
            "cpu_time": 5514.6168838506555,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 338200,
            "real_time": 5903.693436083754,
            "cpu_time": 5866.168474275573,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 273837,
            "real_time": 7632.124705790503,
            "cpu_time": 7553.574228464371,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 239932,
            "real_time": 8676.900001221455,
            "cpu_time": 8640.935835986878,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187216,
            "real_time": 11111.674568554647,
            "cpu_time": 11033.786599435953,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 150682,
            "real_time": 13896.118680423899,
            "cpu_time": 13758.300633121435,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/96/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101594,
            "real_time": 20729.13288186201,
            "cpu_time": 20428.243134437045,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/128/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77971,
            "real_time": 26988.242179455585,
            "cpu_time": 26650.94462043583,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/192/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 357,
            "real_time": 6148427.24352449,
            "cpu_time": 5833023.366946776,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_omp/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83,
            "real_time": 20174591.529957324,
            "cpu_time": 19980880.096385542,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_omp/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 211,
            "real_time": 8591356.78214555,
            "cpu_time": 8512567.786729861,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_omp/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 328,
            "real_time": 5604914.588308552,
            "cpu_time": 5562246.746951225,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_omp/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 418,
            "real_time": 4188166.375374466,
            "cpu_time": 4165169.282296661,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_omp/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 616,
            "real_time": 2712720.43831338,
            "cpu_time": 2699838.842532467,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_omp/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 823,
            "real_time": 2003284.9732587254,
            "cpu_time": 1992242.5674361982,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_omp/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1227,
            "real_time": 1354354.0701427981,
            "cpu_time": 1346785.959250196,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_omp/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2034,
            "real_time": 1036697.7502444471,
            "cpu_time": 1025778.4001966569,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_omp/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2994,
            "real_time": 709656.8052750267,
            "cpu_time": 701430.7217768846,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_omp/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3633,
            "real_time": 565566.1230445926,
            "cpu_time": 561074.9468758582,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_omp/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4703,
            "real_time": 418497.83774902567,
            "cpu_time": 413889.2715288136,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_omp/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6370,
            "real_time": 344651.2053292274,
            "cpu_time": 341281.42480376805,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_omp/96/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5106,
            "real_time": 402554.89128871966,
            "cpu_time": 395291.2424598515,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_omp/128/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6749,
            "real_time": 305481.05291028117,
            "cpu_time": 301764.9974811087,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_omp/192/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 330,
            "real_time": 6285173.4604037395,
            "cpu_time": 6106619.215151497,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_omp/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2043693608.9536176,
            "cpu_time": 2027766517.0000033,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_omp/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1033487958.542537,
            "cpu_time": 1022754865.9999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_omp/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 681079891.6655282,
            "cpu_time": 674226682.666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_omp/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 519270054.99601364,
            "cpu_time": 514107965.49999917,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_omp/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 348933480.1561199,
            "cpu_time": 345751213.16666704,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_omp/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 263683738.87089548,
            "cpu_time": 260974689.49999934,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_omp/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 172630516.99652958,
            "cpu_time": 171184246.33333263,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_omp/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 128004181.18422386,
            "cpu_time": 126828039.2500003,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_omp/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 83264280.08149378,
            "cpu_time": 82502034.16666697,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_omp/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31,
            "real_time": 61299957.289931275,
            "cpu_time": 60813346.93548372,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_omp/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 41006460.20120217,
            "cpu_time": 40514795.355555736,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_omp/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 32448058.111455154,
            "cpu_time": 31997524.611111,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_omp/96/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 24436597.174128003,
            "cpu_time": 24045322.17391298,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_omp/128/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86,
            "real_time": 18836969.500245135,
            "cpu_time": 18597673.081395313,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_omp/192/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 28821591.787026122,
            "cpu_time": 22606375.045454416,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 110461,
            "real_time": 19470.774580758323,
            "cpu_time": 19303.945836086867,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106340,
            "real_time": 19643.441179284862,
            "cpu_time": 19486.551777317927,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115766,
            "real_time": 18177.80331889066,
            "cpu_time": 18004.56236718896,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111313,
            "real_time": 18168.583875764067,
            "cpu_time": 18009.512671475997,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108187,
            "real_time": 17755.20170621908,
            "cpu_time": 17591.001377244822,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 110883,
            "real_time": 18918.508599713245,
            "cpu_time": 18769.21617380485,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113596,
            "real_time": 18175.45897724895,
            "cpu_time": 18053.648455931638,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115643,
            "real_time": 23111.17534125832,
            "cpu_time": 22901.091894883353,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 89604,
            "real_time": 22989.976317575827,
            "cpu_time": 22858.184612294008,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94134,
            "real_time": 22029.969426948035,
            "cpu_time": 21863.028831240717,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98984,
            "real_time": 21122.337256548155,
            "cpu_time": 20892.354097632007,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100150,
            "real_time": 20395.713909219947,
            "cpu_time": 20210.287928107748,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 96694,
            "real_time": 20607.481436736864,
            "cpu_time": 20342.101981508775,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101453,
            "real_time": 21075.78443250709,
            "cpu_time": 20818.5063822656,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98044,
            "real_time": 19042.94967516595,
            "cpu_time": 18957.04991636405,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 41392151.51210212,
            "cpu_time": 40973063.1333332,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 107,
            "real_time": 18216181.196115702,
            "cpu_time": 18019493.168224357,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 165,
            "real_time": 12018517.332828857,
            "cpu_time": 11936729.963636337,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 231,
            "real_time": 8689673.16038826,
            "cpu_time": 8595718.22510831,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 329,
            "real_time": 5781770.771932661,
            "cpu_time": 5746616.109422541,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 439,
            "real_time": 4302322.209795729,
            "cpu_time": 4274828.339407746,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 641,
            "real_time": 2903532.70683455,
            "cpu_time": 2874954.1622464648,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 825,
            "real_time": 2264895.6060804655,
            "cpu_time": 2249328.187878787,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1199,
            "real_time": 1526680.2910718448,
            "cpu_time": 1515967.2435362632,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1711,
            "real_time": 1240413.203374997,
            "cpu_time": 1228875.445938031,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2201,
            "real_time": 937898.8904792177,
            "cpu_time": 925885.6442525974,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2566,
            "real_time": 817992.5327060057,
            "cpu_time": 806609.8168355321,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2849,
            "real_time": 715947.8044910138,
            "cpu_time": 702481.3320463438,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3257,
            "real_time": 654133.9045039918,
            "cpu_time": 643362.5747620472,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3177,
            "real_time": 659635.0532023926,
            "cpu_time": 649772.427132509,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2353253543.0276766,
            "cpu_time": 2330266601.9999948,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1171220326.0394745,
            "cpu_time": 1159622301.5000079,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 792243187.3545672,
            "cpu_time": 786623038.3333269,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 594754630.496027,
            "cpu_time": 588611655.500003,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 402011654.59584445,
            "cpu_time": 398636862.0000008,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 304998180.71257323,
            "cpu_time": 303020497.57143223,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 202880749.19953942,
            "cpu_time": 200635615.80000168,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 150309982.99614742,
            "cpu_time": 149214744.53846118,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 98249732.62203415,
            "cpu_time": 97278422.52381098,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26,
            "real_time": 73168117.2306148,
            "cpu_time": 72624532.23076957,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 49999158.36991644,
            "cpu_time": 49364809.44736918,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48,
            "real_time": 38545125.50121096,
            "cpu_time": 38033585.87500109,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 63,
            "real_time": 28274182.681112535,
            "cpu_time": 27883677.904761888,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 22709016.02643885,
            "cpu_time": 22472584.013157576,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 89,
            "real_time": 17846946.6856482,
            "cpu_time": 17722931.134831347,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 362296,
            "real_time": 5645.801767273857,
            "cpu_time": 5594.415436549091,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 300812,
            "real_time": 6697.275165019456,
            "cpu_time": 6627.369114264101,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 313009,
            "real_time": 5983.401576301956,
            "cpu_time": 5918.380404397377,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 308145,
            "real_time": 6998.933579828141,
            "cpu_time": 6937.832072563201,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321588,
            "real_time": 6540.57636146004,
            "cpu_time": 6469.528129158991,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 287822,
            "real_time": 6968.777296394821,
            "cpu_time": 6892.507702677425,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 255624,
            "real_time": 7975.769465747315,
            "cpu_time": 7880.491311457513,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 236705,
            "real_time": 8719.130757662515,
            "cpu_time": 8630.43256373982,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 215860,
            "real_time": 10554.474747581737,
            "cpu_time": 10451.244116557034,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 183169,
            "real_time": 11676.441936318217,
            "cpu_time": 11558.011934334014,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149841,
            "real_time": 14135.132480326874,
            "cpu_time": 13970.550596965993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 136581,
            "real_time": 15441.503034977191,
            "cpu_time": 15244.951852746834,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 114844,
            "real_time": 18540.012713421227,
            "cpu_time": 18347.04965866747,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98563,
            "real_time": 21810.51120573949,
            "cpu_time": 21503.228087619143,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73707,
            "real_time": 25836.53621835947,
            "cpu_time": 21540.65012821023,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90,
            "real_time": 18295488.722570654,
            "cpu_time": 18119632.09999969,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 217,
            "real_time": 8085964.57161593,
            "cpu_time": 8007284.691244215,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 349,
            "real_time": 5169146.091818681,
            "cpu_time": 5115228.051575959,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 441,
            "real_time": 3861745.4284945684,
            "cpu_time": 3824753.394557806,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 630,
            "real_time": 2542616.7619621586,
            "cpu_time": 2516750.293650765,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 868,
            "real_time": 1939755.0587883552,
            "cpu_time": 1927619.8882488208,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1240,
            "real_time": 1311536.6354712376,
            "cpu_time": 1300600.4112902987,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2123,
            "real_time": 999137.3410347536,
            "cpu_time": 988526.4032030331,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3031,
            "real_time": 671964.2329232227,
            "cpu_time": 666926.0722533836,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3991,
            "real_time": 526180.1937020954,
            "cpu_time": 521581.87221248244,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5200,
            "real_time": 393245.21192331583,
            "cpu_time": 387631.85115384473,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4741,
            "real_time": 336656.9168881396,
            "cpu_time": 331763.4304998902,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/96/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5513,
            "real_time": 320564.0410013469,
            "cpu_time": 316182.20007256285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/128/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6097,
            "real_time": 336909.80793298763,
            "cpu_time": 331131.0295227188,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/192/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4528,
            "real_time": 452809.2592759239,
            "cpu_time": 272913.3197879812,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1981087186.0245242,
            "cpu_time": 1963398092.9999666,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1003708317.4684085,
            "cpu_time": 994237511.9999838,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 672672699.0115519,
            "cpu_time": 666314960.999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 540238059.7370211,
            "cpu_time": 535472462.7499934,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 357161863.00851405,
            "cpu_time": 354255952.8333319,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 266615783.9952835,
            "cpu_time": 264223304.57142252,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 177660888.66240656,
            "cpu_time": 176087886.1666697,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 130403514.3693909,
            "cpu_time": 129005325.562499,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 84379928.54063244,
            "cpu_time": 83655385.91666658,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 61954179.84417872,
            "cpu_time": 61254222.43750123,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 40962830.912839204,
            "cpu_time": 40619288.622221775,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 31796743.073586926,
            "cpu_time": 31350197.944443945,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso/96/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 24255071.84751849,
            "cpu_time": 23649618.930554714,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso/128/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77,
            "real_time": 21095823.79139815,
            "cpu_time": 19783591.753245566,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso/192/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 22355171.75080638,
            "cpu_time": 14572278.720588457,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 363648,
            "real_time": 5597.807451169279,
            "cpu_time": 5543.2271097326275,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 238755,
            "real_time": 8925.994517044994,
            "cpu_time": 8825.073581705032,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 286956,
            "real_time": 7254.859884432915,
            "cpu_time": 7179.24418726238,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 255592,
            "real_time": 8367.648279078689,
            "cpu_time": 8278.347655638758,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 241075,
            "real_time": 8617.221487118726,
            "cpu_time": 8522.000589028221,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 214793,
            "real_time": 9767.221315587927,
            "cpu_time": 9663.789597426437,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 196078,
            "real_time": 10692.943165381585,
            "cpu_time": 10612.417435918202,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176221,
            "real_time": 12282.0047779982,
            "cpu_time": 12209.086834146328,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/24/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161639,
            "real_time": 13363.298022237064,
            "cpu_time": 13248.022482197981,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/32/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160305,
            "real_time": 13093.526184298362,
            "cpu_time": 12955.520620068339,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/48/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 146063,
            "real_time": 14876.412848865333,
            "cpu_time": 14737.027303286584,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 137616,
            "real_time": 15814.703355779964,
            "cpu_time": 15654.989129171096,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/96/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/96/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111434,
            "real_time": 18835.73648993106,
            "cpu_time": 18587.507780390388,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/128/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106934,
            "real_time": 19744.359661868217,
            "cpu_time": 19483.66501767395,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/192/10/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/192/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46830,
            "real_time": 33166.106022057524,
            "cpu_time": 25581.762182362003,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_auto/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87,
            "real_time": 18617342.83906033,
            "cpu_time": 18442720.96551797,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_auto/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 217,
            "real_time": 8082998.580672706,
            "cpu_time": 8001822.230414588,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_auto/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 346,
            "real_time": 5187074.734218354,
            "cpu_time": 5134219.933525945,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_auto/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 444,
            "real_time": 3887240.477336302,
            "cpu_time": 3845097.4459460094,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_auto/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 650,
            "real_time": 2537325.3122401927,
            "cpu_time": 2512655.226153828,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_auto/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 852,
            "real_time": 1930717.7453170673,
            "cpu_time": 1911524.0446008646,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_auto/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1276,
            "real_time": 1275296.420074976,
            "cpu_time": 1260903.8691222493,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_auto/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2122,
            "real_time": 970846.3025725255,
            "cpu_time": 960284.0108388233,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/24/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_auto/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3025,
            "real_time": 674164.5266417384,
            "cpu_time": 666825.6135537361,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/32/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_auto/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3969,
            "real_time": 519347.1834185548,
            "cpu_time": 512682.0047871237,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/48/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_auto/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5029,
            "real_time": 403566.2336515359,
            "cpu_time": 398182.71863192105,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_auto/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6078,
            "real_time": 331786.57781965256,
            "cpu_time": 327515.0742020437,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/96/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_auto/96/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6512,
            "real_time": 316846.16169290437,
            "cpu_time": 312705.42705774127,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_auto/128/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6184,
            "real_time": 336923.16381296294,
            "cpu_time": 332282.6921086597,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/192/500/real_time",
            "family_index": 6,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_auto/192/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4868,
            "real_time": 442968.65201602643,
            "cpu_time": 357745.2329498731,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_auto/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2007555103.9772108,
            "cpu_time": 1988193299.000045,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_auto/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1033972073.9866607,
            "cpu_time": 1024090738.0000408,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_auto/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 709667346.9835272,
            "cpu_time": 703679303.3333122,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_auto/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 531146584.74805766,
            "cpu_time": 527004613.49999726,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_auto/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 360995881.1818312,
            "cpu_time": 357227168.5000033,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_auto/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 268772764.1314268,
            "cpu_time": 266326901.50000826,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_auto/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 175513182.8346445,
            "cpu_time": 173733183.7499975,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_auto/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 131008186.62496749,
            "cpu_time": 130177573.62499793,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/24/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_auto/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 84012055.95873763,
            "cpu_time": 83372159.08333443,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/32/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_auto/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31,
            "real_time": 62742858.32525141,
            "cpu_time": 62109208.61290546,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/48/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_auto/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 42414613.09246979,
            "cpu_time": 41669289.1395346,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_auto/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53,
            "real_time": 32646476.47123432,
            "cpu_time": 31929102.924529083,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/96/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_auto/96/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 23243221.820943724,
            "cpu_time": 22485974.447762083,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_auto/128/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 110,
            "real_time": 16358480.42787984,
            "cpu_time": 14582942.499999842,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/192/2500/real_time",
            "family_index": 6,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_auto/192/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127,
            "real_time": 13756457.992140057,
            "cpu_time": 6322016.669291698,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:24:55-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/nested_pool_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            31.4595,
            27.5015,
            29.6284
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2557,
            "real_time": 816.5969390066623,
            "cpu_time": 808.9957125537741,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3107,
            "real_time": 690.2990704983843,
            "cpu_time": 683.2631770196331,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3413,
            "real_time": 650.1700796941151,
            "cpu_time": 644.7139238206857,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3673,
            "real_time": 563.4425673823984,
            "cpu_time": 558.4472891369455,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4208,
            "real_time": 494.03058602355117,
            "cpu_time": 489.1964389258559,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4114,
            "real_time": 474.48593218175637,
            "cpu_time": 470.2164783665529,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3580,
            "real_time": 503.51715811876863,
            "cpu_time": 498.3723427374306,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3958,
            "real_time": 518.6600220013653,
            "cpu_time": 513.169520717534,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3655,
            "real_time": 591.3495094414734,
            "cpu_time": 585.9759668946647,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3178,
            "real_time": 652.7099505994678,
            "cpu_time": 647.1229550031462,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3132,
            "real_time": 672.6824690077553,
            "cpu_time": 665.9104674329507,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1592,
            "real_time": 1296.9894108132005,
            "cpu_time": 1282.9310772613046,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/96/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1213,
            "real_time": 1741.5401244594393,
            "cpu_time": 1721.9704567188762,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/128/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 987,
            "real_time": 2125.0747842950086,
            "cpu_time": 2103.7456433637317,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/192/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 892,
            "real_time": 2384.798213001132,
            "cpu_time": 2375.8745919282514,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 25720.881506582195,
            "cpu_time": 25478.229975308655,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 80,
            "real_time": 26220.078737242147,
            "cpu_time": 25936.683162499998,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 26083.382851168237,
            "cpu_time": 25797.462913580293,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 25945.654617519016,
            "cpu_time": 25687.65269135798,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 25043.08612085879,
            "cpu_time": 24795.53439999999,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77,
            "real_time": 26753.065182355705,
            "cpu_time": 26481.687025973988,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79,
            "real_time": 26708.70054383538,
            "cpu_time": 26464.565645569597,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83,
            "real_time": 24913.667264977943,
            "cpu_time": 24730.222554216816,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103,
            "real_time": 20331.206776708073,
            "cpu_time": 20143.188378640785,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 123,
            "real_time": 17217.193951648547,
            "cpu_time": 17034.93368292683,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 153,
            "real_time": 13423.523183126807,
            "cpu_time": 13249.295947712462,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186,
            "real_time": 11273.055570200086,
            "cpu_time": 11131.596709677397,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/96/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 230,
            "real_time": 9151.145182651184,
            "cpu_time": 9027.43310434787,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/128/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 136,
            "real_time": 17573.208654271988,
            "cpu_time": 17352.616691176525,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/192/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84,
            "real_time": 25658.422440756112,
            "cpu_time": 25558.901130952312,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4135870.0200216845,
            "cpu_time": 4094632.3960000086,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3886821.4320391417,
            "cpu_time": 3849125.5080000004,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3583077.8679810464,
            "cpu_time": 3548628.899999997,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3283223.564038053,
            "cpu_time": 3251119.2280000076,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2774208.709015511,
            "cpu_time": 2748042.53100001,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2289450.000040233,
            "cpu_time": 2266118.018,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1759098.3270201832,
            "cpu_time": 1740818.5689999983,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1458545.8665387705,
            "cpu_time": 1442846.8655000017,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1067574.971995782,
            "cpu_time": 1055886.4864999934,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 854228.7249583751,
            "cpu_time": 848153.7655000011,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 623412.1776651591,
            "cpu_time": 616476.0086666661,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 499684.2445107177,
            "cpu_time": 493825.2832500005,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/96/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 397035.62180511653,
            "cpu_time": 392134.5947999981,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/128/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 356765.10849346715,
            "cpu_time": 352293.82583333546,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/192/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 629828.5257362295,
            "cpu_time": 626515.0517499975,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_folly/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 179,
            "real_time": 11609.584820852224,
            "cpu_time": 42.56218994415276,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_folly/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 165,
            "real_time": 14861.299618231978,
            "cpu_time": 53.49112121211185,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_folly/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 23260.45064136195,
            "cpu_time": 92.50782051285802,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_folly/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103,
            "real_time": 19918.908203445666,
            "cpu_time": 99.3516019417429,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_folly/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106,
            "real_time": 17536.250349432932,
            "cpu_time": 207.12301886787353,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_folly/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 128,
            "real_time": 13647.330984895234,
            "cpu_time": 240.66099999997536,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_folly/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 183,
            "real_time": 11475.700142035068,
            "cpu_time": 130.58952459018371,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/16/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_folly/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 225,
            "real_time": 8879.341666276256,
            "cpu_time": 157.89895111115334,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/24/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_folly/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 293,
            "real_time": 6995.249191162402,
            "cpu_time": 182.05176109218368,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/32/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_folly/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 329,
            "real_time": 6090.916638364478,
            "cpu_time": 106.11675075985845,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/48/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_folly/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 382,
            "real_time": 5238.977497375292,
            "cpu_time": 149.33942146594583,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/64/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_folly/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 427,
            "real_time": 4849.148360768058,
            "cpu_time": 129.34088524592565,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/96/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_folly/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 456,
            "real_time": 4463.851098543066,
            "cpu_time": 131.3767872807073,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/128/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_folly/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 476,
            "real_time": 4331.044172339862,
            "cpu_time": 119.56745168066642,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/192/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_folly/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 473,
            "real_time": 4351.2304969639945,
            "cpu_time": 129.00362579281818,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_folly/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 450063.94078955054,
            "cpu_time": 312.5569999980371,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_folly/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 515094.5247441996,
            "cpu_time": 254.23625000087213,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_folly/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 329381.84160739183,
            "cpu_time": 176.73099999910846,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_folly/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 273963.1469982366,
            "cpu_time": 174.75716666837116,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_folly/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 290061.0505769561,
            "cpu_time": 327.8735714279232,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_folly/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 310126.4443685068,
            "cpu_time": 402.73325000050875,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 21,
            "run_name": "BM_folly/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 279716.8006654829,
            "cpu_time": 644.293833332199,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/16/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 22,
            "run_name": "BM_folly/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 231524.91728043449,
            "cpu_time": 667.9754285719192,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/24/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 23,
            "run_name": "BM_folly/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 198706.03650342673,
            "cpu_time": 904.8280000001796,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/32/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 24,
            "run_name": "BM_folly/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 169625.51966814013,
            "cpu_time": 975.4139999991197,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/48/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 25,
            "run_name": "BM_folly/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 141459.9449975261,
            "cpu_time": 1052.644749999369,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/64/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 26,
            "run_name": "BM_folly/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 128411.01668891497,
            "cpu_time": 1070.6315625004236,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/96/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 27,
            "run_name": "BM_folly/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 118215.85717067744,
            "cpu_time": 1140.594888889426,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/128/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 28,
            "run_name": "BM_folly/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 112419.9654747683,
            "cpu_time": 1095.88147368437,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/192/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 29,
            "run_name": "BM_folly/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 106952.27314718068,
            "cpu_time": 1236.690249999839,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 30,
            "run_name": "BM_folly/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 31,
            "run_name": "BM_folly/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 32,
            "run_name": "BM_folly/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 33,
            "run_name": "BM_folly/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 34,
            "run_name": "BM_folly/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 35,
            "run_name": "BM_folly/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 36,
            "run_name": "BM_folly/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/16/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 37,
            "run_name": "BM_folly/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/24/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 38,
            "run_name": "BM_folly/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/32/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 39,
            "run_name": "BM_folly/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/48/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 40,
            "run_name": "BM_folly/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/64/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 41,
            "run_name": "BM_folly/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/96/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 42,
            "run_name": "BM_folly/96/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/128/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 43,
            "run_name": "BM_folly/128/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/192/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 44,
            "run_name": "BM_folly/192/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3739,
            "real_time": 561.5110823938907,
            "cpu_time": 556.0428133190691,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9572,
            "real_time": 214.23315263771275,
            "cpu_time": 212.12276661094904,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7132,
            "real_time": 266.32617216633525,
            "cpu_time": 263.8601634885035,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8922,
            "real_time": 220.0736441399808,
            "cpu_time": 217.89592423223624,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8929,
            "real_time": 255.39535468784047,
            "cpu_time": 253.3313085451897,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5830,
            "real_time": 263.3105888587341,
            "cpu_time": 260.8660015437415,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4014,
            "real_time": 491.48015493474156,
            "cpu_time": 486.31872047832985,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3277,
            "real_time": 598.8224601639656,
            "cpu_time": 593.2565764418727,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2548,
            "real_time": 786.0636636665971,
            "cpu_time": 780.2290855572971,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1905,
            "real_time": 790.5374640437561,
            "cpu_time": 786.5525296587929,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1047,
            "real_time": 1788.9931041003697,
            "cpu_time": 1771.0673772683801,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1804.3723900336772,
            "cpu_time": 1789.3985299999997,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/96/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1086,
            "real_time": 2200.3241039857157,
            "cpu_time": 2175.605238489857,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/128/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 895,
            "real_time": 2604.70770052296,
            "cpu_time": 2580.4213698324234,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/192/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 761,
            "real_time": 2675.0122259692844,
            "cpu_time": 2633.8760433639973,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 153,
            "real_time": 13906.27705875565,
            "cpu_time": 13745.602457516337,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 462,
            "real_time": 3288.549179557985,
            "cpu_time": 3247.645675324677,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 388,
            "real_time": 5577.736401860842,
            "cpu_time": 5525.879262886616,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 370,
            "real_time": 6430.39589175149,
            "cpu_time": 6370.274067567582,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 303,
            "real_time": 5439.549300315553,
            "cpu_time": 5389.229392739251,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 369,
            "real_time": 5445.129815728825,
            "cpu_time": 5400.355317073175,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 423,
            "real_time": 5246.521314456354,
            "cpu_time": 5195.226593380659,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 326,
            "real_time": 6409.3522638928125,
            "cpu_time": 6358.6307300613535,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 236,
            "real_time": 9556.114627245688,
            "cpu_time": 9495.631995762718,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 240,
            "real_time": 9727.944937670449,
            "cpu_time": 9667.34431250001,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 19832.651170436293,
            "cpu_time": 19722.706639999786,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 16602.82329775076,
            "cpu_time": 16466.625223404088,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/96/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77,
            "real_time": 24029.753790781288,
            "cpu_time": 23762.269428571446,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/128/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 95,
            "real_time": 26208.495115861297,
            "cpu_time": 25874.03396842122,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/192/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 28785.290499516308,
            "cpu_time": 28451.014871795,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2173924.262984656,
            "cpu_time": 2156555.6619999884,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 169642.42554439063,
            "cpu_time": 167788.8757272709,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 319893.1255028583,
            "cpu_time": 316696.2470000006,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 580504.3373644972,
            "cpu_time": 575842.3730000004,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 556944.4946401442,
            "cpu_time": 552353.7656666709,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 754006.3100168481,
            "cpu_time": 748735.3793333397,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 547277.9212577734,
            "cpu_time": 542167.5367500001,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 481394.80319805443,
            "cpu_time": 478402.34119999537,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 341403.3738275369,
            "cpu_time": 338778.5111666659,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 263397.59925031103,
            "cpu_time": 260740.92087500135,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 345024.1790075476,
            "cpu_time": 341169.24116666784,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 484914.2519989982,
            "cpu_time": 479072.2232000007,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/96/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso/96/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 691314.4426653162,
            "cpu_time": 682057.0326666579,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/128/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso/128/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 782094.419002533,
            "cpu_time": 771350.8413333348,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/192/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso/192/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 634940.2366516491,
            "cpu_time": 623837.9143333266,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19213,
            "real_time": 111.65777588218461,
            "cpu_time": 110.50088159058969,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11486,
            "real_time": 167.29135051716378,
            "cpu_time": 165.7152928782865,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14873,
            "real_time": 140.260770585813,
            "cpu_time": 138.8846322194593,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13864,
            "real_time": 142.4424644339689,
            "cpu_time": 141.02427257645618,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21051,
            "real_time": 109.83408441116131,
            "cpu_time": 108.81114502874091,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19768,
            "real_time": 95.79625733689345,
            "cpu_time": 94.85108898219187,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12860,
            "real_time": 169.36375357610396,
            "cpu_time": 167.8141041990652,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10895,
            "real_time": 141.54544873133185,
            "cpu_time": 140.14532739788547,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10875,
            "real_time": 156.68389508393648,
            "cpu_time": 155.47697048276305,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12483,
            "real_time": 191.29878418280006,
            "cpu_time": 189.67768332932454,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10271,
            "real_time": 195.9300837288661,
            "cpu_time": 194.1636900009725,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11223,
            "real_time": 175.70291053655444,
            "cpu_time": 174.0501858683033,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/96/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10110,
            "real_time": 192.37952730383515,
            "cpu_time": 190.31166815035104,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/128/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10558,
            "real_time": 215.78623763888757,
            "cpu_time": 213.2610588179596,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/192/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7707,
            "real_time": 291.9531694563973,
            "cpu_time": 243.23985130401113,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 620,
            "real_time": 3359.2989660739418,
            "cpu_time": 3328.5372709677345,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 786,
            "real_time": 2588.6582824136144,
            "cpu_time": 2563.2044262086324,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 700,
            "real_time": 2538.561227099438,
            "cpu_time": 2513.1478499999307,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 600,
            "real_time": 3228.1607765859612,
            "cpu_time": 3208.750396666744,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 966,
            "real_time": 1825.1060094109757,
            "cpu_time": 1808.1552380952376,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_bulk/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 722,
            "real_time": 2622.078020813155,
            "cpu_time": 2605.9237853185864,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_bulk/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 939,
            "real_time": 2022.2084334415717,
            "cpu_time": 2006.3829435569705,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_bulk/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1172,
            "real_time": 1378.006859163598,
            "cpu_time": 1366.175880546095,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_bulk/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1292,
            "real_time": 1703.3984296055562,
            "cpu_time": 1689.0853018575845,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_bulk/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1306,
            "real_time": 1627.6237450020922,
            "cpu_time": 1615.5292695252454,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_bulk/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1141,
            "real_time": 1908.9737624431145,
            "cpu_time": 1891.148898334795,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_bulk/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1148,
            "real_time": 1910.2322587902408,
            "cpu_time": 1897.3146149825604,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/96/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_bulk/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1204,
            "real_time": 1694.7716843503902,
            "cpu_time": 1682.6662782392154,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/128/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_bulk/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1798,
            "real_time": 1027.0844538047604,
            "cpu_time": 1018.4883331479192,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/192/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_bulk/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 984,
            "real_time": 1993.8776229803518,
            "cpu_time": 1947.9100467479977,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_bulk/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 553279.7162595671,
            "cpu_time": 547842.8962500033,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_bulk/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 185155.28640709817,
            "cpu_time": 183454.7523000026,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_bulk/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 190434.54029597342,
            "cpu_time": 188377.2377000014,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_bulk/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 211162.30763833632,
            "cpu_time": 209357.81899999545,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_bulk/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 186047.321905128,
            "cpu_time": 184455.7979090899,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_bulk/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 191267.06270035356,
            "cpu_time": 189784.95430000065,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_bulk/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 143009.98845674956,
            "cpu_time": 142025.0975384647,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_bulk/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 127544.1037655315,
            "cpu_time": 126478.87676470593,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_bulk/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 82652.85904607957,
            "cpu_time": 82123.07661904648,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_bulk/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 53943.43257955227,
            "cpu_time": 53510.39447368509,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_bulk/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 49439.21600116065,
            "cpu_time": 48986.45351351348,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_bulk/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 57706.742242255525,
            "cpu_time": 57137.24964864868,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/96/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_bulk/96/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 41295.12085055467,
            "cpu_time": 40855.31755000034,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/128/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_bulk/128/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 54539.696852926434,
            "cpu_time": 53980.34975609829,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/192/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_bulk/192/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39,
            "real_time": 56159.7689998169,
            "cpu_time": 55679.01474359051,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "once_function_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:32:18-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/once_function_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            57.3027,
            42.7261,
            35.6768
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_move_std_function<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23808834,
            "real_time": 90.22610574064771,
            "cpu_time": 89.367340500589,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kSmallSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 118773852,
            "real_time": 18.200115813128075,
            "cpu_time": 18.03554094549363,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kMediumSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21815838,
            "real_time": 98.04443936580248,
            "cpu_time": 97.07727871833302,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kMediumSize>",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 162472093,
            "real_time": 12.675758420414196,
            "cpu_time": 12.540839656691064,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kLargeSize>",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22108637,
            "real_time": 94.82982284240283,
            "cpu_time": 93.90342367102957,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kLargeSize>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 165326238,
            "real_time": 12.52188034427126,
            "cpu_time": 12.397111999850855,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kExtraLargeSize>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20308717,
            "real_time": 103.33237633815814,
            "cpu_time": 102.35555525245627,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kExtraLargeSize>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64845044,
            "real_time": 32.45897671057875,
            "cpu_time": 32.1913679632942,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kSmallSize>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2103879,
            "real_time": 1027.143109470045,
            "cpu_time": 1017.4279490407952,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kSmallSize>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 627468,
            "real_time": 3361.821626088938,
            "cpu_time": 3330.657641505222,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kSmallSize>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 754043,
            "real_time": 2801.8524209845837,
            "cpu_time": 2774.3337488710895,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kMediumSize>",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1151717,
            "real_time": 1804.8427053006515,
            "cpu_time": 1787.6533219532212,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kMediumSize>",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 601120,
            "real_time": 3543.9121156492406,
            "cpu_time": 3510.3853456880447,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kMediumSize>",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 725543,
            "real_time": 2971.3241144936474,
            "cpu_time": 2941.5337312881543,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kLargeSize>",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 687849,
            "real_time": 3035.8301109637937,
            "cpu_time": 3006.302091011252,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kLargeSize>",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 437288,
            "real_time": 4914.103515310017,
            "cpu_time": 4865.072178975862,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kLargeSize>",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 616156,
            "real_time": 3446.682486087839,
            "cpu_time": 3414.854830594853,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kExtraLargeSize>",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 233267,
            "real_time": 8985.298515959676,
            "cpu_time": 8895.561425319498,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kExtraLargeSize>",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 296605,
            "real_time": 6987.662335701662,
            "cpu_time": 6917.517135584379,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kExtraLargeSize>",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321096,
            "real_time": 6576.404779099659,
            "cpu_time": 6513.889874056355,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pipeline_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:33:15-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/pipeline_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            24.0415,
            35.8721,
            33.7368
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 187840995.17771128,
            "cpu_time": 186309662.27272728,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 119103590.2269909,
            "cpu_time": 35720952.33333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 166112452.5940977,
            "cpu_time": 153797189.99999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 120492782.23147506,
            "cpu_time": 148229.7647058735,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_par/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 451,
            "real_time": 4583813.634157973,
            "cpu_time": 942132.4567627513,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_par/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 486,
            "real_time": 4369178.378701578,
            "cpu_time": 4325472.3559670765,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow_par/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 67558199.37825435,
            "cpu_time": 138724.53125002293,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pool_allocator_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:33:34-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/pool_allocator_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            33.1187,
            37.0737,
            34.1709
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_mallocfree<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 720,
            "real_time": 2975919.9347204734,
            "cpu_time": 2945243.738888889,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 152,
            "real_time": 14348658.203839716,
            "cpu_time": 14206345.15131579,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49805,
            "real_time": 41829.69655771653,
            "cpu_time": 41429.041903423364,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11849,
            "real_time": 168008.74613465345,
            "cpu_time": 166407.90387374457,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59362,
            "real_time": 33794.848673860324,
            "cpu_time": 33438.827987601486,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16110,
            "real_time": 127849.54264510277,
            "cpu_time": 126596.51688392299,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161,
            "real_time": 13254944.664593901,
            "cpu_time": 13109526.416149057,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 57140856.40549357,
            "cpu_time": 56540248.97297301,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56413,
            "real_time": 35292.99994628807,
            "cpu_time": 34967.114228989754,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14940,
            "real_time": 143401.5512679625,
            "cpu_time": 141908.9246318609,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69652,
            "real_time": 30056.156463357773,
            "cpu_time": 29769.902989146038,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17552,
            "real_time": 123024.96302838938,
            "cpu_time": 121861.13400182319,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/8192",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124,
            "real_time": 17332000.604402572,
            "cpu_time": 17086223.629032273,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/32768",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 76420053.44628003,
            "cpu_time": 75511598.37931047,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/8192",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55496,
            "real_time": 37197.92132828996,
            "cpu_time": 36838.585844024725,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/32768",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12997,
            "real_time": 166955.776410512,
            "cpu_time": 165315.65284296378,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65673,
            "real_time": 32402.068932969254,
            "cpu_time": 32070.661900628867,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16778,
            "real_time": 129539.98187919,
            "cpu_time": 128159.56037668386,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90877,
            "real_time": 22772.6631609868,
            "cpu_time": 22535.253606523085,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23207,
            "real_time": 91573.88395439883,
            "cpu_time": 90695.97897186199,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113682,
            "real_time": 19129.843871014305,
            "cpu_time": 18961.00865572387,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27391,
            "real_time": 76020.71056192703,
            "cpu_time": 75279.94001679377,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91332,
            "real_time": 24500.514847263847,
            "cpu_time": 24260.763456400782,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21902,
            "real_time": 93817.99278311372,
            "cpu_time": 92919.5760661125,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111313,
            "real_time": 18869.79938539505,
            "cpu_time": 18680.792486052796,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 12,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28116,
            "real_time": 77850.0221602829,
            "cpu_time": 77047.65770379861,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92589,
            "real_time": 24446.957975564914,
            "cpu_time": 24206.85626802322,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 13,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21497,
            "real_time": 93475.17965490145,
            "cpu_time": 92546.9846025025,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112118,
            "real_time": 18986.34414661028,
            "cpu_time": 18793.758656058784,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 14,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26455,
            "real_time": 79856.03349275241,
            "cpu_time": 79108.15131355147,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 641,
            "real_time": 3282233.900150833,
            "cpu_time": 3250158.5101404125,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "family_index": 15,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 138,
            "real_time": 15278250.93480583,
            "cpu_time": 15115516.67391298,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 160791.86840215698,
            "cpu_time": 159312.92369999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "family_index": 16,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3977,
            "real_time": 577326.7867842158,
            "cpu_time": 572278.7005280381,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 152,
            "real_time": 14093878.28913934,
            "cpu_time": 13924574.61184212,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "family_index": 17,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 55199268.77847562,
            "cpu_time": 54601113.63888881,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14028,
            "real_time": 179931.52002981378,
            "cpu_time": 178391.6127744512,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "family_index": 18,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3764,
            "real_time": 584039.71492299,
            "cpu_time": 579161.0547290116,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 18578763.810219243,
            "cpu_time": 17688453.709999975,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "family_index": 19,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 104357349.50006008,
            "cpu_time": 102983835.44999936,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13564,
            "real_time": 142368.2185179389,
            "cpu_time": 140963.03546151612,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "family_index": 20,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3533,
            "real_time": 584453.8737662578,
            "cpu_time": 579017.6048683835,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 514,
            "real_time": 4166962.9999356717,
            "cpu_time": 4127856.9416342364,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "family_index": 21,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 118,
            "real_time": 17835011.796520676,
            "cpu_time": 17647068.627118614,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 498,
            "real_time": 3337312.323306818,
            "cpu_time": 3311129.8835341316,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "family_index": 22,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 120,
            "real_time": 18312848.72508453,
            "cpu_time": 18205555.658333357,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 128,
            "real_time": 15078477.054885298,
            "cpu_time": 14941666.28906246,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "family_index": 23,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 54511617.81705818,
            "cpu_time": 54044452.157894716,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 453,
            "real_time": 4539657.997843248,
            "cpu_time": 4508837.260485624,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "family_index": 24,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 136,
            "real_time": 19361157.389700085,
            "cpu_time": 19246038.713235315,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 45885539.51078549,
            "cpu_time": 44448430.95348865,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "family_index": 25,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 396402581.49787164,
            "cpu_time": 393217006.6666648,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 452,
            "real_time": 4387526.597263111,
            "cpu_time": 4362062.734513259,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "family_index": 26,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86,
            "real_time": 19438887.744611345,
            "cpu_time": 19314790.91860451,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 473,
            "real_time": 4450289.575121194,
            "cpu_time": 4417203.0591965895,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "family_index": 27,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103,
            "real_time": 19171191.3692557,
            "cpu_time": 19030045.262135815,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 33040409.303688698,
            "cpu_time": 32868009.13043445,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "family_index": 28,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 141298952.00200665,
            "cpu_time": 140574830.00000012,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127,
            "real_time": 16726744.26772287,
            "cpu_time": 16586183.464566851,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "family_index": 29,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 56956960.973833255,
            "cpu_time": 56524271.459459126,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 31212321.33707963,
            "cpu_time": 31032936.455882184,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "family_index": 30,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 135157251.50500658,
            "cpu_time": 134310330.7499991,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 95670608.75027286,
            "cpu_time": 90226984.16666585,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "family_index": 31,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 875061692.4953647,
            "cpu_time": 864340090.4999936,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 34722476.0013793,
            "cpu_time": 34529578.18965554,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "family_index": 32,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 174422600.78516686,
            "cpu_time": 173220625.33333513,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "rw_lock_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:36:34-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/rw_lock_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            10.2012,
            22.9292,
            29.0811
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<NopMutex>/2/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<NopMutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7137,
            "real_time": 302898.2279790129,
            "cpu_time": 300216.6782962029,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/8/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<NopMutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6552,
            "real_time": 321741.76084413845,
            "cpu_time": 318866.6422466423,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/32/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<NopMutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6683,
            "real_time": 339963.1092224516,
            "cpu_time": 336376.0848421368,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/128/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<NopMutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6820,
            "real_time": 305530.71656198817,
            "cpu_time": 302566.7529325513,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/512/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<NopMutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8647,
            "real_time": 234241.11761073474,
            "cpu_time": 231765.03134034926,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<std::shared_mutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 298,
            "real_time": 6986149.140999297,
            "cpu_time": 6907578.879194634,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<std::shared_mutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 364,
            "real_time": 5735710.994366066,
            "cpu_time": 5677567.3241758235,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<std::shared_mutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 386,
            "real_time": 5424167.795307027,
            "cpu_time": 5371657.582901547,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/128/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<std::shared_mutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 375,
            "real_time": 5476145.517391463,
            "cpu_time": 5420475.9120000005,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/512/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<std::shared_mutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 389,
            "real_time": 5357427.603941037,
            "cpu_time": 5303152.460154246,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<dispenso::RWLock>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 609,
            "real_time": 3485600.331573394,
            "cpu_time": 3450794.4515599287,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<dispenso::RWLock>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 596,
            "real_time": 3487103.6946639144,
            "cpu_time": 3450254.1359060383,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/32/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<dispenso::RWLock>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 586,
            "real_time": 3487332.4675634885,
            "cpu_time": 3452171.4539249074,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/128/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<dispenso::RWLock>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 597,
            "real_time": 3493424.1557493806,
            "cpu_time": 3460749.336683413,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/512/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<dispenso::RWLock>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 603,
            "real_time": 3463243.933589156,
            "cpu_time": 3429617.9618573748,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 306,
            "real_time": 6879518.196227187,
            "cpu_time": 6813402.777777783,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 71866493.78854781,
            "cpu_time": 68354871.53571431,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 167318594.00169924,
            "cpu_time": 86000753.08333288,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<std::shared_mutex>/8/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 341148277.99688405,
            "cpu_time": 104271116.33333297,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<std::shared_mutex>/16/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 677657012.323228,
            "cpu_time": 105234778.99999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<std::shared_mutex>/32/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1352303583.9898512,
            "cpu_time": 111762926.00000082,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 349,
            "real_time": 5951811.882392383,
            "cpu_time": 5896462.856733539,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 115266769.44534606,
            "cpu_time": 89894137.05555588,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 341040573.9975561,
            "cpu_time": 158293300.00000092,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<std::shared_mutex>/8/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 580087924.337325,
            "cpu_time": 167796793.33333302,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<std::shared_mutex>/16/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1217093817.0165756,
            "cpu_time": 401011086.0000005,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<std::shared_mutex>/32/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2481908222.078346,
            "cpu_time": 204796371.99999702,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 368,
            "real_time": 5439939.5951844305,
            "cpu_time": 5385150.5543478215,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 108378071.25062683,
            "cpu_time": 76447088.69999981,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 310903409.8451957,
            "cpu_time": 145972527.0000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_parallel<std::shared_mutex>/8/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 887331245.0014054,
            "cpu_time": 274109744.0000004,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_parallel<std::shared_mutex>/16/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2087598671.0889266,
            "cpu_time": 250165195.000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_parallel<std::shared_mutex>/32/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4436426979.955286,
            "cpu_time": 791947821.9999974,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 404,
            "real_time": 5272600.472741068,
            "cpu_time": 5224836.839108911,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 70298664.95513883,
            "cpu_time": 48570622.260869004,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 213834074.64335588,
            "cpu_time": 96449829.45454603,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_parallel<std::shared_mutex>/8/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 482531272.3405659,
            "cpu_time": 128105148.50000013,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_parallel<std::shared_mutex>/16/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1080140528.5019426,
            "cpu_time": 166671073.9999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_parallel<std::shared_mutex>/32/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2182503418.0656075,
            "cpu_time": 288442493.9999946,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 418,
            "real_time": 5083712.509668401,
            "cpu_time": 5032738.67464115,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 31957196.472730074,
            "cpu_time": 26174849.055555586,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 81975506.10984088,
            "cpu_time": 57832362.37037039,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_parallel<std::shared_mutex>/8/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 220818660.00209832,
            "cpu_time": 107122599.88888997,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_parallel<std::shared_mutex>/16/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 447109856.759198,
            "cpu_time": 109446369.74999966,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_parallel<std::shared_mutex>/32/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 836072857.3364515,
            "cpu_time": 93196583.99999754,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 565,
            "real_time": 3700086.488429688,
            "cpu_time": 3663051.141592932,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60,
            "real_time": 28392350.266221914,
            "cpu_time": 28136411.916666523,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 133017257.26729879,
            "cpu_time": 131967607.46666692,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<dispenso::RWLock>/8/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 508057433.99541825,
            "cpu_time": 504347667.9999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<dispenso::RWLock>/16/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2760446025.9899497,
            "cpu_time": 2750920006.0000014,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<dispenso::RWLock>/32/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 9022123825.037851,
            "cpu_time": 8947394693.000008,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 574,
            "real_time": 3720033.916284554,
            "cpu_time": 3690767.7421602737,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71,
            "real_time": 29537342.013266515,
            "cpu_time": 29399313.1971832,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 112291602.57713182,
            "cpu_time": 111825059.3157895,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<dispenso::RWLock>/8/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 349326075.1944035,
            "cpu_time": 347836337.80000114,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<dispenso::RWLock>/16/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1204738817.526959,
            "cpu_time": 1199941004.4999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<dispenso::RWLock>/32/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3991187106.0030537,
            "cpu_time": 3965838927.9999967,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 535,
            "real_time": 3664471.061734883,
            "cpu_time": 3621378.5457943855,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 20912511.30629217,
            "cpu_time": 20729904.438775465,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 60963214.51266042,
            "cpu_time": 60459881.909090646,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_parallel<dispenso::RWLock>/8/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 187014475.99839106,
            "cpu_time": 185834493.083334,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_parallel<dispenso::RWLock>/16/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 591411885.6502076,
            "cpu_time": 588287564.0000027,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_parallel<dispenso::RWLock>/32/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1940640599.001199,
            "cpu_time": 1919230553.999995,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 565,
            "real_time": 3696541.874298611,
            "cpu_time": 3651792.467256661,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 131,
            "real_time": 15678133.679560976,
            "cpu_time": 15528147.36641213,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 43702080.71676366,
            "cpu_time": 43388178.04347852,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_parallel<dispenso::RWLock>/8/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 106150809.05048862,
            "cpu_time": 105457471.52631621,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_parallel<dispenso::RWLock>/16/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 293041637.001027,
            "cpu_time": 290652035.14285743,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_parallel<dispenso::RWLock>/32/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 877953816.4963014,
            "cpu_time": 873629618.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 561,
            "real_time": 3729210.1123302164,
            "cpu_time": 3689839.035650662,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 146,
            "real_time": 14379766.992969465,
            "cpu_time": 14274084.136986304,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60,
            "real_time": 34453917.16668382,
            "cpu_time": 34196698.21666673,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_parallel<dispenso::RWLock>/8/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 80204004.19831276,
            "cpu_time": 79687162.6799998,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_parallel<dispenso::RWLock>/16/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 194945967.50009805,
            "cpu_time": 193819910.70000026,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_parallel<dispenso::RWLock>/32/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 482794601.6099304,
            "cpu_time": 479725596.3999988,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:40:04-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/simple_for_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            9.74854,
            15.1763,
            24.6289
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14846989,
            "real_time": 145.1576499406187,
            "cpu_time": 143.90739570157962,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14255,
            "real_time": 148426.6688174368,
            "cpu_time": 147355.00589266923,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 63,
            "real_time": 24937919.827015508,
            "cpu_time": 24736828.11111111,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2491896,
            "real_time": 832.4648379903613,
            "cpu_time": 825.12399755046,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1427890,
            "real_time": 1467.2887715688491,
            "cpu_time": 1451.887445111319,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1253031,
            "real_time": 1500.7217809578617,
            "cpu_time": 1484.973148309979,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1161459,
            "real_time": 1486.78320627105,
            "cpu_time": 1470.6797364349504,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1072041,
            "real_time": 1453.226355112451,
            "cpu_time": 1439.4383890168372,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 885121,
            "real_time": 2514.4078403804365,
            "cpu_time": 2492.450194945097,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 652935,
            "real_time": 3067.598129856155,
            "cpu_time": 3040.139393660927,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 506858,
            "real_time": 4235.101146729439,
            "cpu_time": 4197.0250740838665,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 335493,
            "real_time": 6169.035169282156,
            "cpu_time": 6114.240779986474,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 278881,
            "real_time": 7451.089281800264,
            "cpu_time": 7367.941781620122,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 216840,
            "real_time": 9674.570891217429,
            "cpu_time": 9582.73347168419,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 168144,
            "real_time": 12519.62525619168,
            "cpu_time": 12397.667213816725,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115545,
            "real_time": 17646.232039260907,
            "cpu_time": 17457.03627158253,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 93905,
            "real_time": 22076.28717305528,
            "cpu_time": 21861.645716415558,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 329,
            "real_time": 6428535.765823894,
            "cpu_time": 5669532.273556222,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_omp/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14693,
            "real_time": 144641.39494751344,
            "cpu_time": 143506.09875450906,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_omp/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27532,
            "real_time": 75915.22145009942,
            "cpu_time": 75174.97835246245,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_omp/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39769,
            "real_time": 54520.801253825324,
            "cpu_time": 53971.849732203395,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_omp/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49905,
            "real_time": 41325.43440433061,
            "cpu_time": 40938.889089269665,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_omp/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70219,
            "real_time": 29213.35921850156,
            "cpu_time": 28892.42022814337,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_omp/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90233,
            "real_time": 23412.237352729913,
            "cpu_time": 23169.04229051454,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_omp/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127816,
            "real_time": 18878.962594416003,
            "cpu_time": 18680.063481880195,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_omp/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 145906,
            "real_time": 14503.1855097176,
            "cpu_time": 14348.510068126072,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_omp/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 165349,
            "real_time": 12623.428493121623,
            "cpu_time": 12494.663324241516,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_omp/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 168234,
            "real_time": 12976.136910789395,
            "cpu_time": 12828.415147948688,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_omp/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154419,
            "real_time": 13527.269325258048,
            "cpu_time": 13379.326106243374,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_omp/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 134246,
            "real_time": 15592.475089923177,
            "cpu_time": 15398.312381746922,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_omp/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 96876,
            "real_time": 21633.21891987572,
            "cpu_time": 21271.34527643588,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_omp/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77546,
            "real_time": 26753.703492930174,
            "cpu_time": 26359.33730946787,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_omp/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 347,
            "real_time": 6562273.815523844,
            "cpu_time": 5897006.927953908,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_omp/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 21714866.09244757,
            "cpu_time": 21467063.591836706,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_omp/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 122,
            "real_time": 18675493.4838973,
            "cpu_time": 18441887.631147593,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_omp/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 114,
            "real_time": 17438571.0351839,
            "cpu_time": 17254878.938596476,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_omp/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 109,
            "real_time": 17913056.248072706,
            "cpu_time": 17311327.853210885,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_omp/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 18064284.79962051,
            "cpu_time": 17676580.530000053,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_omp/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 140,
            "real_time": 14478039.413890136,
            "cpu_time": 14209072.435714327,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_omp/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 247,
            "real_time": 11060544.579037229,
            "cpu_time": 10871543.51417001,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_omp/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 253,
            "real_time": 8013386.695310768,
            "cpu_time": 7944493.501976294,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_omp/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 372,
            "real_time": 6325738.405951509,
            "cpu_time": 6195711.026881711,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_omp/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 441,
            "real_time": 5001837.920657277,
            "cpu_time": 4938730.768707489,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_omp/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 520,
            "real_time": 3769977.1153991325,
            "cpu_time": 3701465.2788461545,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_omp/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 608,
            "real_time": 3296111.5179396556,
            "cpu_time": 3250713.9588815807,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_omp/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 639,
            "real_time": 2911125.159463867,
            "cpu_time": 2845989.08607197,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_omp/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 668,
            "real_time": 2978487.211087172,
            "cpu_time": 2924488.0838323208,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_omp/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 234,
            "real_time": 8407626.247817539,
            "cpu_time": 7778359.858974317,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81948,
            "real_time": 24540.274503863173,
            "cpu_time": 24295.310501781507,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90450,
            "real_time": 22354.358596126964,
            "cpu_time": 22114.75158651178,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106862,
            "real_time": 19562.374220622096,
            "cpu_time": 19385.273090528088,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115534,
            "real_time": 18559.42437674245,
            "cpu_time": 18416.335615489847,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79968,
            "real_time": 22904.185512123266,
            "cpu_time": 22686.821303521465,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87487,
            "real_time": 25243.202453151276,
            "cpu_time": 25044.872872541106,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83321,
            "real_time": 24579.77229056028,
            "cpu_time": 24406.666098582606,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 89200,
            "real_time": 23452.26192820792,
            "cpu_time": 23243.68702914794,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 95901,
            "real_time": 21603.611224089917,
            "cpu_time": 21396.45751347742,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 103528,
            "real_time": 19768.60924613651,
            "cpu_time": 19611.185756510502,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113565,
            "real_time": 18541.838145521946,
            "cpu_time": 18362.74443710652,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 117884,
            "real_time": 17531.90774049164,
            "cpu_time": 17338.754538359866,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 114006,
            "real_time": 18040.79448454231,
            "cpu_time": 17830.28778309923,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111558,
            "real_time": 19161.111234444616,
            "cpu_time": 18945.944764158445,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 97916,
            "real_time": 20270.132624257603,
            "cpu_time": 20182.857234772833,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8303,
            "real_time": 253236.9001590969,
            "cpu_time": 250459.24786221856,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10441,
            "real_time": 197961.7395780509,
            "cpu_time": 196162.56900680214,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12187,
            "real_time": 161119.6313296689,
            "cpu_time": 159654.94001805384,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14662,
            "real_time": 141510.67542643435,
            "cpu_time": 140369.95041604195,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18433,
            "real_time": 116589.32333639829,
            "cpu_time": 115843.97596701577,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20434,
            "real_time": 101413.52143324054,
            "cpu_time": 100520.98478026858,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25053,
            "real_time": 83226.05312479338,
            "cpu_time": 82647.51718357086,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27455,
            "real_time": 76789.65544027885,
            "cpu_time": 76438.58207976737,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31396,
            "real_time": 67870.22136416154,
            "cpu_time": 67439.25436361249,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33944,
            "real_time": 63140.80217511872,
            "cpu_time": 62723.53052085743,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37350,
            "real_time": 56813.61349479082,
            "cpu_time": 56337.802168674934,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38876,
            "real_time": 53719.74151370136,
            "cpu_time": 53163.97124189708,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39277,
            "real_time": 52710.297401243944,
            "cpu_time": 51979.08190544038,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40369,
            "real_time": 54255.577049793785,
            "cpu_time": 53548.7684609477,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27168,
            "real_time": 64402.872752429794,
            "cpu_time": 64087.788906066045,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 21923932.193407826,
            "cpu_time": 21673013.1224491,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 18580971.660558134,
            "cpu_time": 18394460.590000108,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161,
            "real_time": 16370165.366419123,
            "cpu_time": 16193614.161490746,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 174,
            "real_time": 9905376.84491097,
            "cpu_time": 9853346.511494271,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 10035532.352048904,
            "cpu_time": 9948177.196969664,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 255,
            "real_time": 7996179.568398671,
            "cpu_time": 7933957.666666718,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 346,
            "real_time": 6385689.838170316,
            "cpu_time": 6337395.739884405,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 311,
            "real_time": 6566035.839387626,
            "cpu_time": 6522831.852090006,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 308,
            "real_time": 5841974.912108491,
            "cpu_time": 5796885.217532472,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 390,
            "real_time": 5866863.586915992,
            "cpu_time": 5829508.900000036,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 375,
            "real_time": 6575457.51457413,
            "cpu_time": 6523123.37333342,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 339,
            "real_time": 5912406.162281897,
            "cpu_time": 5833345.38643071,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 378,
            "real_time": 4983235.089904161,
            "cpu_time": 4891713.037037129,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 317,
            "real_time": 6060687.081788428,
            "cpu_time": 5971527.933753875,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 291,
            "real_time": 8473697.67013374,
            "cpu_time": 8281456.261168409,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 1012502.0308070816,
            "cpu_time": 69997.20829999773,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_taskflow/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12474,
            "real_time": 2433231.9090104173,
            "cpu_time": 184868.18278017934,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_taskflow/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9068,
            "real_time": 1499217.7407440073,
            "cpu_time": 120061.18824437966,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_taskflow/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5627,
            "real_time": 1119299.945966871,
            "cpu_time": 95987.23636041254,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_taskflow/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4840,
            "real_time": 1743259.860735679,
            "cpu_time": 188158.54958677955,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_taskflow/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4393,
            "real_time": 1997683.1636576657,
            "cpu_time": 216161.2110175348,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_taskflow/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3723,
            "real_time": 1858385.4431876317,
            "cpu_time": 179211.18076818952,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_taskflow/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3408,
            "real_time": 1910188.8538672186,
            "cpu_time": 179181.720363849,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_taskflow/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2990,
            "real_time": 1917513.1979669419,
            "cpu_time": 171437.1963210725,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_taskflow/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2618,
            "real_time": 1860444.2379753995,
            "cpu_time": 158018.29793736365,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_taskflow/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2288,
            "real_time": 1855939.16174477,
            "cpu_time": 154893.89729021006,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_taskflow/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1970,
            "real_time": 1852478.6908898107,
            "cpu_time": 138816.6862944282,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/96/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_taskflow/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1542,
            "real_time": 1842383.985786944,
            "cpu_time": 79035.6776913136,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/128/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_taskflow/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1640110.6900302693,
            "cpu_time": 54394.21299996638,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/192/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_taskflow/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 2155663.8299953192,
            "cpu_time": 53941.78799997462,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_taskflow/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 277,
            "real_time": 21314442.99618784,
            "cpu_time": 20923.6137184415,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_taskflow/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 434,
            "real_time": 23181538.255820867,
            "cpu_time": 15932.50921659016,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_taskflow/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 494,
            "real_time": 21663585.795572232,
            "cpu_time": 35611.15182181216,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_taskflow/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 418,
            "real_time": 14317329.590872785,
            "cpu_time": 29854.488038276006,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_taskflow/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 507,
            "real_time": 10737052.410280725,
            "cpu_time": 14123.595660820589,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_taskflow/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 561,
            "real_time": 12690871.465061385,
            "cpu_time": 39777.80213896646,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_taskflow/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 730,
            "real_time": 17279020.178112622,
            "cpu_time": 42826.7643835874,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_taskflow/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 851,
            "real_time": 18369660.877757274,
            "cpu_time": 47711.72032903988,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_taskflow/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 997,
            "real_time": 19351821.975902084,
            "cpu_time": 56237.045135437285,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_taskflow/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 978,
            "real_time": 17373276.05939674,
            "cpu_time": 58721.058282205966,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_taskflow/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1089,
            "real_time": 17069242.209306415,
            "cpu_time": 58839.678604209555,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_taskflow/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1088,
            "real_time": 15856202.943027977,
            "cpu_time": 64910.26470592356,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/96/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_taskflow/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1085,
            "real_time": 15108879.653453689,
            "cpu_time": 50229.264516155825,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/128/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_taskflow/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1019,
            "real_time": 14598880.534839906,
            "cpu_time": 49834.601570173334,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/192/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_taskflow/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 953,
            "real_time": 14576062.559348542,
            "cpu_time": 87874.264428125,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_taskflow/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 200012609.49842012,
            "cpu_time": 16700.500001181557,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_taskflow/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 258498884.56539938,
            "cpu_time": 15442.434781344555,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_taskflow/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 185123988.1652873,
            "cpu_time": 16984.708333704173,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_taskflow/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 213046969.000802,
            "cpu_time": 17474.518518543777,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_taskflow/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 221819641.17626196,
            "cpu_time": 17825.674999016883,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_taskflow/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 264321961.3553009,
            "cpu_time": 24499.847458080974,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 36,
            "run_name": "BM_taskflow/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 247957636.84740064,
            "cpu_time": 22563.40909129352,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/16/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 37,
            "run_name": "BM_taskflow/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 258675649.36979085,
            "cpu_time": 24964.407407002942,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/24/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 38,
            "run_name": "BM_taskflow/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 203259672.52481344,
            "cpu_time": 23049.29487177805,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/32/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 39,
            "run_name": "BM_taskflow/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 201954033.2270977,
            "cpu_time": 23720.431818348556,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/48/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 40,
            "run_name": "BM_taskflow/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92,
            "real_time": 187184466.54384023,
            "cpu_time": 22927.934782911587,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/64/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 41,
            "run_name": "BM_taskflow/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105,
            "real_time": 202122975.23837712,
            "cpu_time": 27470.619047454376,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/96/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 42,
            "run_name": "BM_taskflow/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 204517530.77806032,
            "cpu_time": 23912.68141605198,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/128/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 43,
            "run_name": "BM_taskflow/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 205455565.29247886,
            "cpu_time": 26067.0707966992,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/192/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 44,
            "run_name": "BM_taskflow/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 117,
            "real_time": 214881837.64959726,
            "cpu_time": 48448.538461603974,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13723098,
            "real_time": 149.61118094670408,
            "cpu_time": 148.2072505785538,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13973739,
            "real_time": 149.38001612164715,
            "cpu_time": 147.876298104608,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13787576,
            "real_time": 150.43078616512554,
            "cpu_time": 148.90648124079198,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13429813,
            "real_time": 151.0383588350627,
            "cpu_time": 149.6210866078347,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13219501,
            "real_time": 150.83205236033433,
            "cpu_time": 149.34720380141357,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13972538,
            "real_time": 148.25166694870714,
            "cpu_time": 146.8615826272932,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13820155,
            "real_time": 150.48665518043407,
            "cpu_time": 148.988025459914,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13976443,
            "real_time": 153.07804303421852,
            "cpu_time": 151.53571763573686,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13716379,
            "real_time": 151.92769032502483,
            "cpu_time": 150.36377443347124,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12982246,
            "real_time": 148.35583111169882,
            "cpu_time": 146.8705545250035,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13821339,
            "real_time": 151.38875431015967,
            "cpu_time": 149.88921927173743,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14139547,
            "real_time": 154.5471518311366,
            "cpu_time": 152.89377990681007,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13128243,
            "real_time": 150.09538709677673,
            "cpu_time": 148.60651733822738,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13586629,
            "real_time": 150.75179215093763,
            "cpu_time": 149.25392751947533,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12850018,
            "real_time": 153.91277685815493,
            "cpu_time": 152.40551320628384,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14986,
            "real_time": 144541.080940483,
            "cpu_time": 143057.18770853078,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22365,
            "real_time": 96730.55577956568,
            "cpu_time": 95742.24457858341,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31028,
            "real_time": 67619.90766465041,
            "cpu_time": 66972.40189506266,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40034,
            "real_time": 52457.446046075864,
            "cpu_time": 52000.627017034254,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51993,
            "real_time": 37147.996018067875,
            "cpu_time": 36824.94545419578,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 63539,
            "real_time": 31959.86605036914,
            "cpu_time": 31652.359527220182,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55435,
            "real_time": 38171.69459803292,
            "cpu_time": 37900.734373591426,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52510,
            "real_time": 41170.93334540972,
            "cpu_time": 40821.603884973534,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47623,
            "real_time": 44117.384477506435,
            "cpu_time": 43859.35193078989,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48962,
            "real_time": 43176.80709504921,
            "cpu_time": 42910.0752215998,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53221,
            "real_time": 42286.29317292531,
            "cpu_time": 42116.364912346675,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55248,
            "real_time": 35061.372302416894,
            "cpu_time": 34904.54447219822,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56162,
            "real_time": 35672.71995283248,
            "cpu_time": 35124.772087889476,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53001,
            "real_time": 38213.318484197625,
            "cpu_time": 37381.72347691546,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42370,
            "real_time": 53955.16301712985,
            "cpu_time": 41246.84649516102,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 102,
            "real_time": 20892851.117441393,
            "cpu_time": 20728916.068627674,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 145,
            "real_time": 17081643.027606707,
            "cpu_time": 16905753.703448337,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 125,
            "real_time": 13725195.16851753,
            "cpu_time": 13579886.33599962,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 12465606.128367094,
            "cpu_time": 12326138.312820287,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 201,
            "real_time": 8303066.8511263905,
            "cpu_time": 8215738.58706474,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 280,
            "real_time": 9663655.05716697,
            "cpu_time": 9550102.046428729,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 246,
            "real_time": 6277820.284239857,
            "cpu_time": 6226132.808942945,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 296,
            "real_time": 6293089.594298697,
            "cpu_time": 6233331.62162158,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 384,
            "real_time": 5955852.953168991,
            "cpu_time": 5885434.828124906,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 387,
            "real_time": 5780447.599326455,
            "cpu_time": 5724110.816537395,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 384,
            "real_time": 5358049.182177638,
            "cpu_time": 5303609.434895939,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 398,
            "real_time": 5901207.768618031,
            "cpu_time": 5834053.339196055,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 375,
            "real_time": 5717746.178619564,
            "cpu_time": 5576748.11199998,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 362,
            "real_time": 5848543.381431649,
            "cpu_time": 5244469.093922697,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 372,
            "real_time": 5903559.317249023,
            "cpu_time": 3323073.2688172148,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13187326,
            "real_time": 148.7569423132739,
            "cpu_time": 147.4117439729593,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13534604,
            "real_time": 150.12663148199582,
            "cpu_time": 148.59519879561864,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000000,
            "real_time": 151.33309459779412,
            "cpu_time": 149.8622087000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13720530,
            "real_time": 149.95293184418998,
            "cpu_time": 148.39297767651627,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13852520,
            "real_time": 148.55272296669986,
            "cpu_time": 147.0373039706845,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static_chunk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13675605,
            "real_time": 148.70390560840826,
            "cpu_time": 147.28778902286234,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static_chunk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14154836,
            "real_time": 149.00057901833745,
            "cpu_time": 147.54957627202208,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/16/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static_chunk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14368768,
            "real_time": 148.37784797964196,
            "cpu_time": 146.90424774065607,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/24/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static_chunk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13721226,
            "real_time": 150.86870576082663,
            "cpu_time": 149.35289936919744,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/32/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static_chunk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13700558,
            "real_time": 148.21767179469447,
            "cpu_time": 146.73125036220904,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/48/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static_chunk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14249542,
            "real_time": 145.97334911178078,
            "cpu_time": 144.50226645880977,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/64/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static_chunk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13776198,
            "real_time": 149.01287096526926,
            "cpu_time": 147.46998097733635,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/96/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static_chunk/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13514864,
            "real_time": 152.0472191970403,
            "cpu_time": 150.50577971039962,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/128/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static_chunk/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13254425,
            "real_time": 152.83689009531287,
            "cpu_time": 151.32372411477832,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/192/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static_chunk/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13252874,
            "real_time": 156.82449792420257,
            "cpu_time": 155.1388581073042,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14575,
            "real_time": 144231.91052978125,
            "cpu_time": 142606.0358147525,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22079,
            "real_time": 93135.08655053891,
            "cpu_time": 92163.43525522077,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30409,
            "real_time": 68440.67299228472,
            "cpu_time": 67826.34858101197,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39913,
            "real_time": 52987.21366522632,
            "cpu_time": 52588.31558640023,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47550,
            "real_time": 41571.335204055315,
            "cpu_time": 41328.28744479591,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static_chunk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53844,
            "real_time": 36211.87046066524,
            "cpu_time": 35936.461332739476,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_static_chunk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51964,
            "real_time": 37606.773575867235,
            "cpu_time": 37302.6051304747,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/16/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_static_chunk/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57664,
            "real_time": 41106.08431430196,
            "cpu_time": 40870.41089761373,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/24/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_static_chunk/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50984,
            "real_time": 43552.34995246662,
            "cpu_time": 43329.93266514958,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/32/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_static_chunk/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55061,
            "real_time": 43395.934635678415,
            "cpu_time": 43185.64348631566,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/48/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_static_chunk/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51202,
            "real_time": 40943.646596378036,
            "cpu_time": 40745.71860474172,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/64/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_static_chunk/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57089,
            "real_time": 35730.4011459677,
            "cpu_time": 35297.01854998343,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/96/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_static_chunk/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57094,
            "real_time": 36416.45680934485,
            "cpu_time": 35912.69594002925,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/128/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_static_chunk/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52578,
            "real_time": 38262.451996940996,
            "cpu_time": 37771.40965803224,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/192/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_static_chunk/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39451,
            "real_time": 58476.61417713335,
            "cpu_time": 44005.86829231142,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 22681746.15313242,
            "cpu_time": 22434021.520408534,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 18026406.000135466,
            "cpu_time": 17824503.23999967,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 133,
            "real_time": 16736477.39899982,
            "cpu_time": 16524899.060150398,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 131,
            "real_time": 14605264.252279209,
            "cpu_time": 14434406.740457939,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 214,
            "real_time": 10118236.911056617,
            "cpu_time": 10015481.05607441,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_static_chunk/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 242,
            "real_time": 8231141.27688464,
            "cpu_time": 8148199.632231365,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_static_chunk/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 261,
            "real_time": 7737237.33698828,
            "cpu_time": 7666410.586206947,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/16/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_static_chunk/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 348,
            "real_time": 6204031.629289833,
            "cpu_time": 6140013.307471244,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/24/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_static_chunk/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 369,
            "real_time": 5705067.726372058,
            "cpu_time": 5637635.490514858,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/32/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_static_chunk/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 391,
            "real_time": 5263303.248140285,
            "cpu_time": 5217155.910486035,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/48/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_static_chunk/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 384,
            "real_time": 5333656.9114132235,
            "cpu_time": 5276667.510416703,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/64/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_static_chunk/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 395,
            "real_time": 5445617.820206983,
            "cpu_time": 5370566.498734235,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/96/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_static_chunk/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 393,
            "real_time": 5661304.707463556,
            "cpu_time": 5522905.127226464,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/128/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_static_chunk/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 380,
            "real_time": 6031501.081490301,
            "cpu_time": 5118290.250000033,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/192/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_static_chunk/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 376,
            "real_time": 6002672.84577019,
            "cpu_time": 3621195.9202128635,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13974819,
            "real_time": 149.4653872799041,
            "cpu_time": 148.01514023187192,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13879194,
            "real_time": 149.5856694566477,
            "cpu_time": 147.9030734061375,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13919298,
            "real_time": 149.10413484640995,
            "cpu_time": 147.65429283861513,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13986496,
            "real_time": 149.77357788418072,
            "cpu_time": 148.32170030291883,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14060843,
            "real_time": 150.44423595306677,
            "cpu_time": 149.02234844666933,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto_chunk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14139367,
            "real_time": 148.27148379272955,
            "cpu_time": 146.88532513513275,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto_chunk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13515390,
            "real_time": 148.79993148358662,
            "cpu_time": 147.37372565646083,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/16/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto_chunk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14365550,
            "real_time": 149.9531685880817,
            "cpu_time": 148.55423551482548,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/24/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto_chunk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000000,
            "real_time": 150.94097079709172,
            "cpu_time": 149.5571449999943,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/32/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto_chunk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13828694,
            "real_time": 153.85405924446587,
            "cpu_time": 152.36746174295615,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/48/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto_chunk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13372587,
            "real_time": 153.28329881149796,
            "cpu_time": 151.8413972554446,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/64/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto_chunk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13504076,
            "real_time": 150.55381745045105,
            "cpu_time": 149.03893965051586,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/96/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto_chunk/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13395755,
            "real_time": 152.7694527074495,
            "cpu_time": 151.33302549949698,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/128/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto_chunk/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13283786,
            "real_time": 157.73919280602166,
            "cpu_time": 156.07321978839445,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/192/1000/real_time",
            "family_index": 8,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto_chunk/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13228584,
            "real_time": 155.03225522833677,
            "cpu_time": 153.53118565071125,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14975,
            "real_time": 138642.68681721666,
            "cpu_time": 137239.8552921565,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25536,
            "real_time": 80908.42347974332,
            "cpu_time": 80136.95089285492,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32652,
            "real_time": 58912.96061649863,
            "cpu_time": 58282.10097390526,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38814,
            "real_time": 56165.083218255524,
            "cpu_time": 55693.031097026585,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48798,
            "real_time": 35555.08270985147,
            "cpu_time": 35159.23888274015,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_auto_chunk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48977,
            "real_time": 33297.21022076906,
            "cpu_time": 32983.653020804566,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_auto_chunk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42450,
            "real_time": 46669.43154358102,
            "cpu_time": 46469.04864546407,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/16/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_auto_chunk/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52998,
            "real_time": 46507.96607376841,
            "cpu_time": 46281.993245027654,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/24/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_auto_chunk/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61572,
            "real_time": 42744.63520740719,
            "cpu_time": 42560.28538946366,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/32/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_auto_chunk/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48042,
            "real_time": 41167.97487735984,
            "cpu_time": 40994.7981974109,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/48/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_auto_chunk/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51707,
            "real_time": 38917.948537296455,
            "cpu_time": 38541.94869166567,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/64/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_auto_chunk/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48369,
            "real_time": 39425.76790908441,
            "cpu_time": 39102.91356033744,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/96/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_auto_chunk/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53809,
            "real_time": 38871.64232767334,
            "cpu_time": 38241.37854262402,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/128/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_auto_chunk/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54234,
            "real_time": 37002.5952551935,
            "cpu_time": 36347.888888888476,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/192/1000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_auto_chunk/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43531,
            "real_time": 54609.48999553103,
            "cpu_time": 41887.624520458485,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101,
            "real_time": 20851639.248107992,
            "cpu_time": 20595477.56435578,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 125,
            "real_time": 18524814.27974999,
            "cpu_time": 18305795.927999496,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 18001617.749687284,
            "cpu_time": 17808552.40000051,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 227,
            "real_time": 13640183.630288633,
            "cpu_time": 13480201.339207008,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 9064748.710370623,
            "cpu_time": 8990963.60499982,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_auto_chunk/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 248,
            "real_time": 7840320.475928996,
            "cpu_time": 7792757.637097045,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_auto_chunk/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 340,
            "real_time": 6212999.1854924485,
            "cpu_time": 6163495.305882554,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/16/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_auto_chunk/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 350,
            "real_time": 5800308.714221631,
            "cpu_time": 5760558.408571471,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/24/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_auto_chunk/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 332,
            "real_time": 5553317.50908945,
            "cpu_time": 5513182.454819456,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/32/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_auto_chunk/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321,
            "real_time": 6572704.218223551,
            "cpu_time": 6504563.342679131,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/48/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_auto_chunk/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 309,
            "real_time": 7117333.378519223,
            "cpu_time": 7002175.977346317,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/64/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_auto_chunk/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 300,
            "real_time": 7145794.129852827,
            "cpu_time": 7078960.196666533,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/96/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_auto_chunk/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 318,
            "real_time": 6937983.78957787,
            "cpu_time": 6734189.45911954,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/128/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_auto_chunk/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 319,
            "real_time": 6718327.893093111,
            "cpu_time": 5373253.87460825,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/192/100000000/real_time",
            "family_index": 8,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_auto_chunk/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 280,
            "real_time": 7957426.92841616,
            "cpu_time": 3883602.7500000875,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:00:46-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/simple_pool_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            42.4438,
            35.9297,
            34.1357
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84013,
            "real_time": 24.927065692179184,
            "cpu_time": 24.68703292347613,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78702,
            "real_time": 26.329147187883446,
            "cpu_time": 26.03303855048156,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78608,
            "real_time": 26.45929264204295,
            "cpu_time": 26.184991680236102,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76981,
            "real_time": 27.446563866006528,
            "cpu_time": 27.157246463413042,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72467,
            "real_time": 28.70057424631941,
            "cpu_time": 28.371959554003887,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64240,
            "real_time": 36.240268633413855,
            "cpu_time": 35.94460526151931,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27839,
            "real_time": 72.30507148525898,
            "cpu_time": 71.75490538453248,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39863,
            "real_time": 51.466464692080464,
            "cpu_time": 51.046581541780654,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15345,
            "real_time": 175.06843337193786,
            "cpu_time": 173.38521700879767,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13874,
            "real_time": 152.23555614609532,
            "cpu_time": 150.52818869828462,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5256,
            "real_time": 377.8091076774951,
            "cpu_time": 373.3370525114153,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3287,
            "real_time": 636.4516592730467,
            "cpu_time": 629.2268947368427,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/96/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2551,
            "real_time": 857.7263606193807,
            "cpu_time": 846.5683018424148,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/128/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1467,
            "real_time": 1437.8365998815975,
            "cpu_time": 1418.4732897068832,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/192/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1827,
            "real_time": 1145.5587564585767,
            "cpu_time": 1140.7272627257794,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8102,
            "real_time": 263.31877018242494,
            "cpu_time": 261.20214465564055,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7976,
            "real_time": 274.82677019028876,
            "cpu_time": 272.17537700601804,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7203,
            "real_time": 287.0116065640696,
            "cpu_time": 284.40989893100044,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7251,
            "real_time": 285.86862528836144,
            "cpu_time": 282.66381768032045,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7104,
            "real_time": 306.7960046341345,
            "cpu_time": 303.4912108671171,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5644,
            "real_time": 373.3840963834914,
            "cpu_time": 369.59469613749184,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4095,
            "real_time": 515.0495367681879,
            "cpu_time": 510.52256971916876,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2972,
            "real_time": 671.9443664342681,
            "cpu_time": 664.737473755048,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1834.2539200093597,
            "cpu_time": 1813.8713190000003,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 631,
            "real_time": 3276.850437456166,
            "cpu_time": 3243.52492234549,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 453,
            "real_time": 4719.372044130338,
            "cpu_time": 4669.445516556302,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 381,
            "real_time": 5592.753475145718,
            "cpu_time": 5524.159212598414,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/96/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 305,
            "real_time": 7139.984721348423,
            "cpu_time": 7060.785357377062,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/128/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 246,
            "real_time": 8655.248829296117,
            "cpu_time": 8562.705211382137,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/192/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 184,
            "real_time": 11522.855125013333,
            "cpu_time": 11481.054211956514,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61,
            "real_time": 33897.193129647705,
            "cpu_time": 33556.53211475397,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 34631.4642377103,
            "cpu_time": 34262.31715254228,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 35223.01955993082,
            "cpu_time": 34820.329983050666,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57,
            "real_time": 37308.14375408125,
            "cpu_time": 36944.56496491228,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 40078.42890806151,
            "cpu_time": 39683.88427777772,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48,
            "real_time": 42137.80499897742,
            "cpu_time": 41697.38910416676,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 63213.63420883084,
            "cpu_time": 62941.80555882387,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26,
            "real_time": 80289.49611784461,
            "cpu_time": 79525.3845384614,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 168674.38628936984,
            "cpu_time": 166778.7295714283,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 314954.0234257334,
            "cpu_time": 312179.69228571415,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 477441.73000101,
            "cpu_time": 472167.5269999991,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 582578.1119929161,
            "cpu_time": 575699.746999998,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/96/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 808698.0386869982,
            "cpu_time": 798228.6050000009,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/128/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 927672.6834941655,
            "cpu_time": 917007.7765000002,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/192/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1129510.3834709153,
            "cpu_time": 1098350.1615000009,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_folly/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6862,
            "real_time": 269.13754211025355,
            "cpu_time": 242.60559676479167,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_folly/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6608,
            "real_time": 278.93653556157113,
            "cpu_time": 274.9374842615007,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_folly/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6356,
            "real_time": 354.4661606418155,
            "cpu_time": 348.44239789175606,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_folly/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4548,
            "real_time": 427.1909511806206,
            "cpu_time": 420.78250615655105,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_folly/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4826,
            "real_time": 511.76684523187345,
            "cpu_time": 504.41389017819955,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_folly/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5395,
            "real_time": 486.5229397578868,
            "cpu_time": 478.1304680259506,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_folly/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5710,
            "real_time": 452.887560073743,
            "cpu_time": 447.2017222416828,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/16/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_folly/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3182,
            "real_time": 518.2549154730121,
            "cpu_time": 510.57097140163535,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/24/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_folly/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3456,
            "real_time": 618.1528434791727,
            "cpu_time": 609.6165300925912,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/32/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_folly/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3684,
            "real_time": 448.00666232677287,
            "cpu_time": 442.2984964712253,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/48/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_folly/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4302,
            "real_time": 446.5722677606166,
            "cpu_time": 440.2936327289573,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/64/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_folly/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3281,
            "real_time": 478.3706501179086,
            "cpu_time": 470.8061176470632,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/96/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_folly/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4821,
            "real_time": 490.44937359003376,
            "cpu_time": 482.8383335407632,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/128/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_folly/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3046,
            "real_time": 532.9109185854693,
            "cpu_time": 524.4726162179899,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/192/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_folly/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3775,
            "real_time": 700.3198381488686,
            "cpu_time": 690.3106855629134,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_folly/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1017,
            "real_time": 2010.886523173893,
            "cpu_time": 1745.1510707964492,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_folly/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 628,
            "real_time": 3757.041840811088,
            "cpu_time": 3714.192695859844,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_folly/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 654,
            "real_time": 4844.02545258568,
            "cpu_time": 4810.957799694181,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_folly/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 484,
            "real_time": 3914.6886010411818,
            "cpu_time": 3880.1076301652533,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_folly/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 508,
            "real_time": 4235.994653532458,
            "cpu_time": 4201.828350393712,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_folly/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 465,
            "real_time": 4649.032283854741,
            "cpu_time": 4613.1239204300755,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 21,
            "run_name": "BM_folly/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 419,
            "real_time": 4609.0470812547965,
            "cpu_time": 4571.941145584732,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/16/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 22,
            "run_name": "BM_folly/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 301,
            "real_time": 6889.265382189019,
            "cpu_time": 6825.806501661133,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/24/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 23,
            "run_name": "BM_folly/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 384,
            "real_time": 5035.754096146168,
            "cpu_time": 4991.876632812457,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/32/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 24,
            "run_name": "BM_folly/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 363,
            "real_time": 5895.224203914917,
            "cpu_time": 5850.758024793446,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/48/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 25,
            "run_name": "BM_folly/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 293,
            "real_time": 6285.655474347466,
            "cpu_time": 6240.814061433452,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/64/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 26,
            "run_name": "BM_folly/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 280,
            "real_time": 6649.934910819866,
            "cpu_time": 6601.096414285682,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/96/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 27,
            "run_name": "BM_folly/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 281,
            "real_time": 5771.14592886321,
            "cpu_time": 5717.410234875498,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/128/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 28,
            "run_name": "BM_folly/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 307,
            "real_time": 5785.03550829651,
            "cpu_time": 5731.392338762225,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/192/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 29,
            "run_name": "BM_folly/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 318,
            "real_time": 7144.154317431011,
            "cpu_time": 7059.912204402552,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 30,
            "run_name": "BM_folly/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 215967.3314438098,
            "cpu_time": 174171.0745555546,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 31,
            "run_name": "BM_folly/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 276269.13983840495,
            "cpu_time": 273526.7415000019,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 32,
            "run_name": "BM_folly/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 395009.3394960277,
            "cpu_time": 392011.6352500003,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 33,
            "run_name": "BM_folly/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 405391.7092678603,
            "cpu_time": 402856.2744999959,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 34,
            "run_name": "BM_folly/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 446125.33778417856,
            "cpu_time": 442730.3953999967,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 35,
            "run_name": "BM_folly/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 477376.88475172035,
            "cpu_time": 473848.45000000554,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 36,
            "run_name": "BM_folly/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 476656.07373346575,
            "cpu_time": 473220.5612500025,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/16/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 37,
            "run_name": "BM_folly/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 560906.1277646106,
            "cpu_time": 556630.1859999996,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/24/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 38,
            "run_name": "BM_folly/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 617663.1397393066,
            "cpu_time": 613979.6017500032,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/32/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 39,
            "run_name": "BM_folly/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 570848.9939958478,
            "cpu_time": 567711.1103333345,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/48/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 40,
            "run_name": "BM_folly/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 505241.53766067076,
            "cpu_time": 501337.2786666726,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/64/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 41,
            "run_name": "BM_folly/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 662707.637733547,
            "cpu_time": 657784.7492499984,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/96/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 42,
            "run_name": "BM_folly/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 630678.9189887544,
            "cpu_time": 624332.7216666709,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/128/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 43,
            "run_name": "BM_folly/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 649783.1986829018,
            "cpu_time": 642848.0319999986,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/192/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 44,
            "run_name": "BM_folly/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 694757.3783497015,
            "cpu_time": 686438.1263333333,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 116285,
            "real_time": 16.521736001990295,
            "cpu_time": 16.333456868899752,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 517092,
            "real_time": 3.437250282336804,
            "cpu_time": 3.4043385374362827,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 163767,
            "real_time": 82.48202245936636,
            "cpu_time": 81.85555459280552,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8352,
            "real_time": 238.60889164150169,
            "cpu_time": 236.5851628352539,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8272,
            "real_time": 285.6565791735474,
            "cpu_time": 284.22409453578615,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6632,
            "real_time": 268.86470386473655,
            "cpu_time": 267.6266709891417,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5600,
            "real_time": 552.7195157316912,
            "cpu_time": 550.6370285714318,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10616,
            "real_time": 561.1728238562625,
            "cpu_time": 558.6144278447654,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9145,
            "real_time": 221.64628649524582,
            "cpu_time": 219.9493708037164,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5201,
            "real_time": 423.68281675906275,
            "cpu_time": 420.07535397039055,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5016,
            "real_time": 299.9373104178571,
            "cpu_time": 296.8051568979212,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4360,
            "real_time": 399.2788882930379,
            "cpu_time": 396.2079116972448,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/96/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5016,
            "real_time": 497.9731720494199,
            "cpu_time": 496.2065522328497,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/128/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4190,
            "real_time": 398.63692219246866,
            "cpu_time": 395.9109083532282,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/192/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5471,
            "real_time": 383.21899944513603,
            "cpu_time": 380.68299707547993,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15654,
            "real_time": 132.30819975826626,
            "cpu_time": 130.9847571866634,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74348,
            "real_time": 28.237171047271204,
            "cpu_time": 27.932300989939215,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8197,
            "real_time": 486.3578481246338,
            "cpu_time": 481.26742320360756,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1322,
            "real_time": 2529.4653509684335,
            "cpu_time": 2516.841250378236,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 721,
            "real_time": 2136.004001390442,
            "cpu_time": 2123.0637101248453,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 863,
            "real_time": 2514.671435690812,
            "cpu_time": 2500.6162155272423,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 730,
            "real_time": 3018.0796752811398,
            "cpu_time": 2995.6753616438023,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 788,
            "real_time": 2930.253258963035,
            "cpu_time": 2908.098394670022,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 553,
            "real_time": 3561.7206038092054,
            "cpu_time": 3537.7510741411106,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 639,
            "real_time": 4842.360783987773,
            "cpu_time": 4803.001397496173,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 475,
            "real_time": 5285.769109614193,
            "cpu_time": 5261.921172631638,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 471,
            "real_time": 4803.573193163028,
            "cpu_time": 4785.855991507336,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/96/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 526,
            "real_time": 4896.998193872802,
            "cpu_time": 4879.588262357398,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/128/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 443,
            "real_time": 5149.824611798484,
            "cpu_time": 5128.650020316006,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/192/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 602,
            "real_time": 3664.4027757913223,
            "cpu_time": 3636.106264119565,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 156,
            "real_time": 13481.202903979769,
            "cpu_time": 13365.92343589725,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 911,
            "real_time": 2947.1443183198735,
            "cpu_time": 2920.205652030728,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 56462.300982309025,
            "cpu_time": 55907.24807407389,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 188927.95814567112,
            "cpu_time": 187946.78100000185,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 214237.81900666654,
            "cpu_time": 212957.10420000093,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 353281.404163378,
            "cpu_time": 351599.99216667376,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 366008.5449926555,
            "cpu_time": 364111.34799999447,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 279008.6681322594,
            "cpu_time": 277205.8017142902,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 363004.38226317056,
            "cpu_time": 361651.05924999353,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 438891.9136021286,
            "cpu_time": 437396.06859999185,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 356638.30938283354,
            "cpu_time": 355100.17419999256,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 365818.34838725626,
            "cpu_time": 363298.519,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/96/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 384193.06848663837,
            "cpu_time": 382546.4144999984,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/128/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 381785.8848293933,
            "cpu_time": 379406.12749999523,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/192/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 354469.02667172253,
            "cpu_time": 351889.6868333362,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2009485,
            "real_time": 1.0502425700769027,
            "cpu_time": 1.03952200140831,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 418074,
            "real_time": 4.99290260076885,
            "cpu_time": 4.950165085606905,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73843,
            "real_time": 27.21966166059483,
            "cpu_time": 26.984977736549272,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46978,
            "real_time": 43.97671725141422,
            "cpu_time": 43.53079716037292,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36870,
            "real_time": 59.30666154066613,
            "cpu_time": 58.74844190398642,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19875,
            "real_time": 102.16786168843696,
            "cpu_time": 101.46306163521885,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 170.2692574937828,
            "cpu_time": 169.04900870000006,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13499,
            "real_time": 150.92567923334659,
            "cpu_time": 150.12243188384397,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13124,
            "real_time": 159.41165712003786,
            "cpu_time": 158.6611434014046,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13036,
            "real_time": 151.59976234750712,
            "cpu_time": 150.5931126112294,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21034,
            "real_time": 98.11694822662457,
            "cpu_time": 97.63098635542607,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13365,
            "real_time": 153.45995248742386,
            "cpu_time": 152.4459536101777,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/96/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13914,
            "real_time": 150.18007841386384,
            "cpu_time": 148.16603032916737,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/128/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12954,
            "real_time": 167.11171290584016,
            "cpu_time": 162.9913093253059,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/192/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10839,
            "real_time": 196.02369462225914,
            "cpu_time": 164.45406162930126,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 205247,
            "real_time": 10.109557947339345,
            "cpu_time": 10.009145687878748,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56242,
            "real_time": 44.73185133896239,
            "cpu_time": 44.31086342946594,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8533,
            "real_time": 291.9235122402643,
            "cpu_time": 288.7029119887547,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4296,
            "real_time": 471.6253898995781,
            "cpu_time": 467.18349022346666,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3233,
            "real_time": 615.5942675366334,
            "cpu_time": 610.0820711413476,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_bulk/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2574,
            "real_time": 937.1221643346474,
            "cpu_time": 928.6206802641896,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_bulk/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1758.5095409303904,
            "cpu_time": 1739.3213779999996,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_bulk/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1195,
            "real_time": 1626.2682075865534,
            "cpu_time": 1607.174887029272,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_bulk/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1233,
            "real_time": 1552.580257092919,
            "cpu_time": 1537.4342952149286,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_bulk/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1263,
            "real_time": 1370.9539501107079,
            "cpu_time": 1363.0314679334956,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_bulk/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1343,
            "real_time": 1478.9639762377371,
            "cpu_time": 1467.980083395388,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_bulk/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1271,
            "real_time": 1550.3636120403087,
            "cpu_time": 1536.5134870181048,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/96/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_bulk/96/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1278,
            "real_time": 1556.784197132807,
            "cpu_time": 1533.4214436619664,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/128/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_bulk/128/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1296,
            "real_time": 1647.0284830530197,
            "cpu_time": 1633.5809768518518,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/192/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_bulk/192/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1299,
            "real_time": 1587.5697459450553,
            "cpu_time": 1527.3406774442108,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_bulk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2083,
            "real_time": 987.4532030416054,
            "cpu_time": 978.4291598655876,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_bulk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 641,
            "real_time": 3870.3117051413183,
            "cpu_time": 3833.4771404056023,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_bulk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 35303.08223773979,
            "cpu_time": 34913.54740909067,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_bulk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 48173.12866604576,
            "cpu_time": 47674.33757575818,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_bulk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 43775.44354938436,
            "cpu_time": 43325.03779999968,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_bulk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 161228.20807728343,
            "cpu_time": 159779.31123076746,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_bulk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 157661.44500006564,
            "cpu_time": 156839.76561538552,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_bulk/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 155586.16061193438,
            "cpu_time": 154740.92853845778,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_bulk/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 156651.09656817678,
            "cpu_time": 155631.34778571452,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_bulk/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 159689.32940158993,
            "cpu_time": 158493.5649999977,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_bulk/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 153666.270386356,
            "cpu_time": 152443.85823077138,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_bulk/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 158513.41172121465,
            "cpu_time": 156606.41060000216,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/96/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_bulk/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 151935.3028968908,
            "cpu_time": 150106.56599999947,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/128/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_bulk/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 154440.88949256443,
            "cpu_time": 152582.70735714064,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/192/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_bulk/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 148155.81942509327,
            "cpu_time": 147403.2743571375,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "small_buffer_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:09:29-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/small_buffer_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            53.8154,
            30.5488,
            30.9009
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_newdelete<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26346,
            "real_time": 78683.88066652126,
            "cpu_time": 77930.26675776207,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6389,
            "real_time": 330888.496627741,
            "cpu_time": 327502.1726404758,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13282,
            "real_time": 156759.58997214196,
            "cpu_time": 155267.122496612,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3419,
            "real_time": 523717.6264852773,
            "cpu_time": 518971.49107926304,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4917,
            "real_time": 430077.6971718124,
            "cpu_time": 425532.4584095991,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1040,
            "real_time": 2048902.4556949376,
            "cpu_time": 2027999.833653845,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18877,
            "real_time": 112376.09196133995,
            "cpu_time": 111355.80203422143,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4713,
            "real_time": 450040.82409814774,
            "cpu_time": 445699.9023976236,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2740,
            "real_time": 782766.3094818211,
            "cpu_time": 774622.812043795,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 607,
            "real_time": 3539301.873118765,
            "cpu_time": 3499584.1317957127,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18727,
            "real_time": 112184.39382140143,
            "cpu_time": 111102.67880600218,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4499,
            "real_time": 452319.70194350433,
            "cpu_time": 448116.87019337586,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 21120,
            "real_time": 98552.20900080343,
            "cpu_time": 97971.89318181817,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 5264,
            "real_time": 389286.9101982354,
            "cpu_time": 387312.53438449925,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 10416,
            "real_time": 195985.78937062365,
            "cpu_time": 195176.5913978492,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 2688,
            "real_time": 791073.308781571,
            "cpu_time": 787507.3478422599,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 3600,
            "real_time": 580271.5925448057,
            "cpu_time": 577763.5555555568,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 704,
            "real_time": 2952121.4191018688,
            "cpu_time": 2933319.4772727196,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 8768,
            "real_time": 237714.8166023336,
            "cpu_time": 236736.05862226328,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 2400,
            "real_time": 906217.6969988892,
            "cpu_time": 900408.9316666663,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1728,
            "real_time": 1186305.3205161123,
            "cpu_time": 1180928.2112268503,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 368,
            "real_time": 5523193.665873765,
            "cpu_time": 5489099.771739139,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 7616,
            "real_time": 267911.83978168515,
            "cpu_time": 266822.27113970637,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 2096,
            "real_time": 998855.2242768964,
            "cpu_time": 994254.3124999991,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "summing_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:10:04-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/summing_for_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            32.6357,
            27.7808,
            29.9585
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7033276,
            "real_time": 289.6005293875894,
            "cpu_time": 286.6507893334486,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7429,
            "real_time": 290511.27474489785,
            "cpu_time": 287605.8229909812,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73,
            "real_time": 29084164.53442753,
            "cpu_time": 28778812.05479452,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2171428,
            "real_time": 972.5855519491528,
            "cpu_time": 963.8436429851695,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1430115,
            "real_time": 1488.2550375757246,
            "cpu_time": 1472.903581879779,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1311525,
            "real_time": 1483.3221227119222,
            "cpu_time": 1468.1754827395603,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1300899,
            "real_time": 1460.9917857122746,
            "cpu_time": 1445.818111167739,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 983153,
            "real_time": 1682.8524329387374,
            "cpu_time": 1665.5108350378844,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 763038,
            "real_time": 2503.0318058038574,
            "cpu_time": 2482.6496504761235,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 670752,
            "real_time": 3108.55554205422,
            "cpu_time": 3076.1686510066324,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 605542,
            "real_time": 3647.8771381501806,
            "cpu_time": 3610.1070181754494,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 363236,
            "real_time": 5588.595004288755,
            "cpu_time": 5530.532642689597,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 285109,
            "real_time": 7122.483113705193,
            "cpu_time": 7046.989677632058,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 194813,
            "real_time": 10840.87867387822,
            "cpu_time": 10735.134975591984,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154940,
            "real_time": 13560.40772585706,
            "cpu_time": 13417.063392280912,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106261,
            "real_time": 19682.850388074192,
            "cpu_time": 19411.27936872421,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 82300,
            "real_time": 25526.95345062251,
            "cpu_time": 25181.76775212636,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5470,
            "real_time": 717489.4180901389,
            "cpu_time": 687431.0016453385,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_omp/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7321,
            "real_time": 288138.7433443224,
            "cpu_time": 285404.65756044234,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_omp/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14178,
            "real_time": 149156.19580228065,
            "cpu_time": 147680.4265058542,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_omp/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20816,
            "real_time": 101538.11962328898,
            "cpu_time": 100514.195234435,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_omp/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27062,
            "real_time": 80622.18058496845,
            "cpu_time": 79974.4904663365,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_omp/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37754,
            "real_time": 54114.126451600634,
            "cpu_time": 53540.70906394019,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_omp/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51191,
            "real_time": 41697.85632194792,
            "cpu_time": 41251.9968549159,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_omp/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71393,
            "real_time": 29361.131554399675,
            "cpu_time": 29050.339529085548,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_omp/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86105,
            "real_time": 24350.770617411286,
            "cpu_time": 24094.66686022876,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_omp/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112277,
            "real_time": 19143.750215746404,
            "cpu_time": 18993.184917659077,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_omp/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 121174,
            "real_time": 17171.635391448555,
            "cpu_time": 17014.967212438252,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_omp/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127888,
            "real_time": 16241.456883889328,
            "cpu_time": 16022.947164706604,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_omp/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115736,
            "real_time": 17921.592996339787,
            "cpu_time": 17680.868096357233,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_omp/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84590,
            "real_time": 24163.647369464117,
            "cpu_time": 23801.927201796912,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_omp/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68105,
            "real_time": 30706.337758464862,
            "cpu_time": 30288.407649952256,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_omp/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 346,
            "real_time": 6100074.037831449,
            "cpu_time": 5891347.99132949,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_omp/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71,
            "real_time": 29233748.60481897,
            "cpu_time": 28913179.22535203,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_omp/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 15074987.069237975,
            "cpu_time": 14951088.760000033,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_omp/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 202,
            "real_time": 10761478.990155024,
            "cpu_time": 10661156.980198005,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_omp/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 256,
            "real_time": 8034382.125060801,
            "cpu_time": 7975285.238281227,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_omp/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 348,
            "real_time": 5883168.166867276,
            "cpu_time": 5839036.100574697,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_omp/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 430,
            "real_time": 5461308.927827537,
            "cpu_time": 5430742.586046527,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_omp/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 525,
            "real_time": 3949553.076222184,
            "cpu_time": 3923306.234285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_omp/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 586,
            "real_time": 3545358.6006352725,
            "cpu_time": 3507802.011945404,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_omp/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1238,
            "real_time": 2087736.1857679843,
            "cpu_time": 2075735.2641357016,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_omp/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1399,
            "real_time": 1566006.9020447708,
            "cpu_time": 1557100.819156544,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_omp/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2261,
            "real_time": 1199778.177374245,
            "cpu_time": 1188616.1291463976,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_omp/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2155,
            "real_time": 971301.5522323807,
            "cpu_time": 951809.5614849151,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_omp/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2078,
            "real_time": 999687.132860343,
            "cpu_time": 981412.7641963473,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_omp/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2671,
            "real_time": 775406.6091334885,
            "cpu_time": 758048.8487457864,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_omp/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321,
            "real_time": 6689136.420634816,
            "cpu_time": 6323196.560747653,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77899,
            "real_time": 27029.055686918076,
            "cpu_time": 26863.2201825443,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78124,
            "real_time": 26593.643220746264,
            "cpu_time": 26452.862667042024,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83725,
            "real_time": 25053.67035010068,
            "cpu_time": 24848.897318602612,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 89356,
            "real_time": 23320.072440003114,
            "cpu_time": 23122.75438694669,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94183,
            "real_time": 21669.686142609415,
            "cpu_time": 21490.176518055418,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98341,
            "real_time": 21566.44191108164,
            "cpu_time": 21384.292838185345,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 97127,
            "real_time": 20710.9944401379,
            "cpu_time": 20582.003418205106,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106647,
            "real_time": 19528.43827800899,
            "cpu_time": 19372.22315676962,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105708,
            "real_time": 20387.033166619127,
            "cpu_time": 20238.545379725354,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108928,
            "real_time": 19180.09585213517,
            "cpu_time": 19051.53801593731,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 117190,
            "real_time": 17874.46314575948,
            "cpu_time": 17673.62018090268,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111514,
            "real_time": 18881.773659865397,
            "cpu_time": 18690.872123679535,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 118001,
            "real_time": 18063.29994631622,
            "cpu_time": 17854.540419148954,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 114594,
            "real_time": 18636.519660151513,
            "cpu_time": 18438.519704347655,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 130954,
            "real_time": 15784.488232148879,
            "cpu_time": 15702.586274569612,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5216,
            "real_time": 403493.5163035475,
            "cpu_time": 399940.27128067665,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7413,
            "real_time": 281568.274920703,
            "cpu_time": 279377.6740860646,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9496,
            "real_time": 226901.44324902346,
            "cpu_time": 225245.94839932726,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10715,
            "real_time": 189875.47055606495,
            "cpu_time": 188432.14596360122,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14154,
            "real_time": 147836.1768384963,
            "cpu_time": 146492.77864914513,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17134,
            "real_time": 121350.21547791566,
            "cpu_time": 120757.69715186108,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21862,
            "real_time": 96203.81804214139,
            "cpu_time": 95625.67175921747,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25669,
            "real_time": 79544.09240476412,
            "cpu_time": 79069.82278234468,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32922,
            "real_time": 63400.96816572183,
            "cpu_time": 63091.108954498886,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38540,
            "real_time": 54578.12200274297,
            "cpu_time": 54257.41027503959,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46825,
            "real_time": 44730.19666766904,
            "cpu_time": 44444.09095568562,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49911,
            "real_time": 40597.999420023945,
            "cpu_time": 40262.30492276257,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54897,
            "real_time": 39416.406270257234,
            "cpu_time": 39007.322294478705,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50143,
            "real_time": 39647.8976723195,
            "cpu_time": 39068.90281794041,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 63437,
            "real_time": 31362.84491624082,
            "cpu_time": 31193.10544319532,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 30426905.39801055,
            "cpu_time": 30135818.220588062,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 140,
            "real_time": 15246663.978489647,
            "cpu_time": 15091213.985714294,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197,
            "real_time": 11014413.101560801,
            "cpu_time": 10900200.994923832,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 263,
            "real_time": 9347527.619883304,
            "cpu_time": 9255603.596958125,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 333,
            "real_time": 7781236.132190146,
            "cpu_time": 7688192.516516513,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 443,
            "real_time": 4976691.158107875,
            "cpu_time": 4952235.144469585,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 653,
            "real_time": 4627876.291065769,
            "cpu_time": 4592960.987748828,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 677,
            "real_time": 3230722.2953954907,
            "cpu_time": 3208799.165435753,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 972,
            "real_time": 1954722.5791987784,
            "cpu_time": 1945143.972222224,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1222,
            "real_time": 1923923.2937901383,
            "cpu_time": 1909759.2160392802,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1304,
            "real_time": 1510703.3289669002,
            "cpu_time": 1501449.4493864856,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1332,
            "real_time": 1445601.7815099622,
            "cpu_time": 1430643.60735734,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1446,
            "real_time": 1473428.5733698613,
            "cpu_time": 1444303.8914246347,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1438,
            "real_time": 1470118.0987582076,
            "cpu_time": 1449616.0751043055,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1333,
            "real_time": 1550643.7367082636,
            "cpu_time": 1529734.6624156036,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6813835,
            "real_time": 312.2187094366394,
            "cpu_time": 308.9242138971678,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6741933,
            "real_time": 316.2979850203427,
            "cpu_time": 312.9422432705919,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6648567,
            "real_time": 312.40136814247387,
            "cpu_time": 309.0741801955225,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6784117,
            "real_time": 316.03588292738596,
            "cpu_time": 312.9795066918793,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6834876,
            "real_time": 311.8125822602392,
            "cpu_time": 308.7055084832543,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6814073,
            "real_time": 311.85363496150075,
            "cpu_time": 308.49235251809296,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6384505,
            "real_time": 324.74961020347183,
            "cpu_time": 321.11364138644825,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6416213,
            "real_time": 321.6150838391804,
            "cpu_time": 318.48866208151856,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6569385,
            "real_time": 314.4518922239314,
            "cpu_time": 311.38552710793806,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6700929,
            "real_time": 308.71024421408447,
            "cpu_time": 305.7789282650154,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6359952,
            "real_time": 326.7222974446451,
            "cpu_time": 323.6671407268414,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6436813,
            "real_time": 325.99103796257043,
            "cpu_time": 322.7346348262656,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5097786,
            "real_time": 334.79385580396104,
            "cpu_time": 331.36937329264106,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6544437,
            "real_time": 319.0686862685745,
            "cpu_time": 315.68960187102266,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6114973,
            "real_time": 334.6174949512944,
            "cpu_time": 331.25296595749876,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7265,
            "real_time": 294829.5416378372,
            "cpu_time": 291299.09745354875,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12836,
            "real_time": 180554.71034606482,
            "cpu_time": 178905.62028669738,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15564,
            "real_time": 135066.36860658586,
            "cpu_time": 133878.42373425752,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19534,
            "real_time": 106998.65823651476,
            "cpu_time": 106024.1972458267,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27480,
            "real_time": 76694.65946188818,
            "cpu_time": 76092.45414846993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34649,
            "real_time": 59949.48197379607,
            "cpu_time": 59395.86028456848,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47903,
            "real_time": 42594.567229894885,
            "cpu_time": 42252.137173036244,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62865,
            "real_time": 33992.32441036033,
            "cpu_time": 33712.467159786414,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76375,
            "real_time": 33515.36892970289,
            "cpu_time": 33301.47984288055,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58686,
            "real_time": 36247.07310039107,
            "cpu_time": 36044.893722523004,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48943,
            "real_time": 35984.82354913241,
            "cpu_time": 35763.16756226642,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55725,
            "real_time": 39487.31287752315,
            "cpu_time": 39298.158833556896,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45740,
            "real_time": 45378.84737672402,
            "cpu_time": 44125.42284652371,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45065,
            "real_time": 45672.658782108876,
            "cpu_time": 43809.52728281372,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31731,
            "real_time": 65357.42835082342,
            "cpu_time": 41752.81551164571,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 30539515.722517133,
            "cpu_time": 30244700.6250001,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 122,
            "real_time": 17494623.040681187,
            "cpu_time": 17343563.2131148,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 169,
            "real_time": 12720908.21879635,
            "cpu_time": 12631621.7041422,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213,
            "real_time": 9493494.662128285,
            "cpu_time": 9409601.661971778,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 311,
            "real_time": 6979325.340785254,
            "cpu_time": 6928421.823151183,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 378,
            "real_time": 5389377.613682497,
            "cpu_time": 5360781.431216878,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 579,
            "real_time": 3741997.2607515273,
            "cpu_time": 3721964.860103538,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 759,
            "real_time": 3177839.2187480438,
            "cpu_time": 3152666.2595520867,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1117,
            "real_time": 1971023.717116408,
            "cpu_time": 1958864.291853153,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1305,
            "real_time": 1642503.5226647654,
            "cpu_time": 1631554.1111111126,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1346,
            "real_time": 1485629.6122166845,
            "cpu_time": 1476386.7696879506,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1375,
            "real_time": 1475880.765779452,
            "cpu_time": 1462092.3803636287,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1442,
            "real_time": 1484003.024248099,
            "cpu_time": 1448193.2371706269,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1278,
            "real_time": 1520769.213689185,
            "cpu_time": 1454218.9053207997,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1131,
            "real_time": 1796906.472967689,
            "cpu_time": 1444540.6622457637,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6453978,
            "real_time": 325.4257228779304,
            "cpu_time": 321.41128525694273,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6369472,
            "real_time": 319.8290745430692,
            "cpu_time": 316.2148821754761,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6729237,
            "real_time": 310.3229972493717,
            "cpu_time": 307.2244781986371,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6748383,
            "real_time": 331.60433425117617,
            "cpu_time": 328.3737619812008,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6478266,
            "real_time": 320.5759981195779,
            "cpu_time": 316.98289465113123,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6433329,
            "real_time": 306.55194130250015,
            "cpu_time": 303.45384776683704,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6909464,
            "real_time": 317.5174809449612,
            "cpu_time": 314.42856001565394,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6405473,
            "real_time": 329.5694516287644,
            "cpu_time": 326.1713604912519,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5347013,
            "real_time": 314.0967635656588,
            "cpu_time": 310.8149974574577,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6840035,
            "real_time": 316.6223177214037,
            "cpu_time": 313.4683642700632,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6719048,
            "real_time": 311.7035504186771,
            "cpu_time": 308.56495101687534,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6453383,
            "real_time": 319.23694300827214,
            "cpu_time": 316.0247288902556,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/96/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/96/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6483954,
            "real_time": 313.5667931994157,
            "cpu_time": 310.3334406752373,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/128/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6585026,
            "real_time": 325.52675873014454,
            "cpu_time": 322.4094876770395,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/192/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/192/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6050551,
            "real_time": 336.86313858186264,
            "cpu_time": 333.76717145264473,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7300,
            "real_time": 292628.43151092017,
            "cpu_time": 289833.59534246987,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11146,
            "real_time": 208723.79983246405,
            "cpu_time": 206224.48097972505,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14366,
            "real_time": 147059.0943147623,
            "cpu_time": 145587.5348740066,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18959,
            "real_time": 112557.42112160187,
            "cpu_time": 111502.46278812163,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27050,
            "real_time": 76244.85227408475,
            "cpu_time": 75439.880073937,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35377,
            "real_time": 61173.72157027782,
            "cpu_time": 60636.63741979167,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_static/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50023,
            "real_time": 42669.4253641693,
            "cpu_time": 42195.650300861606,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_static/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60611,
            "real_time": 33568.91410765407,
            "cpu_time": 33306.890036461984,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_static/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72420,
            "real_time": 31834.72544910091,
            "cpu_time": 31554.379384147353,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_static/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72688,
            "real_time": 36037.544821453186,
            "cpu_time": 35863.068732115404,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_static/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52991,
            "real_time": 35099.76265879266,
            "cpu_time": 34928.830556132234,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_static/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49380,
            "real_time": 38603.01271936475,
            "cpu_time": 38394.990319967634,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/96/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_static/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46196,
            "real_time": 44454.5422326904,
            "cpu_time": 43206.526257684985,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_static/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46048,
            "real_time": 45599.77664475108,
            "cpu_time": 43493.72192060456,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/192/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_static/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39566,
            "real_time": 59920.02244433338,
            "cpu_time": 42409.48655411146,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71,
            "real_time": 29659182.95642912,
            "cpu_time": 29344848.464788843,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 114,
            "real_time": 20185865.39442798,
            "cpu_time": 19964285.03508746,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 148,
            "real_time": 14181477.48650832,
            "cpu_time": 14022770.013513297,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 189,
            "real_time": 11011752.301812291,
            "cpu_time": 10890893.560846502,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 281,
            "real_time": 8740764.989159683,
            "cpu_time": 8649685.729537332,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_static/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 299,
            "real_time": 6275556.896296158,
            "cpu_time": 6222041.414715655,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_static/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 565,
            "real_time": 4235458.746964557,
            "cpu_time": 4197415.789380569,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_static/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 716,
            "real_time": 3033479.55728418,
            "cpu_time": 3011880.2346369103,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_static/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 917,
            "real_time": 2336486.0709449896,
            "cpu_time": 2318800.031624895,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_static/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 924,
            "real_time": 1663891.3214029302,
            "cpu_time": 1650863.0346320572,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_static/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 974,
            "real_time": 1795355.3306105216,
            "cpu_time": 1781564.9804928286,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_static/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 983,
            "real_time": 1783452.5920842923,
            "cpu_time": 1767781.9226856458,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/96/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_static/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1154,
            "real_time": 1836896.2624770438,
            "cpu_time": 1751382.8076256819,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_static/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1196,
            "real_time": 1696131.775976411,
            "cpu_time": 1564133.2357859882,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/192/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_static/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1169,
            "real_time": 1769629.4405356597,
            "cpu_time": 1465886.9606501462,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "timed_task_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:18:16-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/timed_task_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            34.7729,
            33.062,
            32.3589
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_dispenso<2,false>/manual_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 186.95238095227847,
            "time_unit": "ns",
            "\t0 User": 0.0004919999999999994,
            "\t1 System": 0.0013039999999999996,
            "mean": 2.7140469872392674e-05,
            "stddev": 3.926980323384966e-05
          },
          {
            "name": "BM_dispenso<4,false>/manual_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 201.7619047619374,
            "time_unit": "ns",
            "\t0 User": 0.00011299999999999852,
            "\t1 System": 0.0018929999999999989,
            "mean": 2.704811501316719e-05,
            "stddev": 3.1390342298115365e-05
          },
          {
            "name": "BM_dispenso<6,false>/manual_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 199.33333333332254,
            "time_unit": "ns",
            "\t0 User": 0.0001810000000000006,
            "\t1 System": 0.0021309999999999975,
            "mean": 3.479623445309686e-05,
            "stddev": 4.866726682928979e-05
          },
          {
            "name": "BM_dispenso<2,true>/manual_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 170.71428571430445,
            "time_unit": "ns",
            "\t0 User": 0.0006359999999999994,
            "\t1 System": 0.0014449999999999984,
            "mean": 3.315717534035652e-05,
            "stddev": 4.4912856182945694e-05
          },
          {
            "name": "BM_dispenso<4,true>/manual_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 203.14285714282371,
            "time_unit": "ns",
            "\t0 User": 0.0007479999999999987,
            "\t1 System": 0.0017489999999999936,
            "mean": 3.5652824022868315e-05,
            "stddev": 2.0879758342796617e-05
          },
          {
            "name": "BM_dispenso<6,true>/manual_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 195.52380952382134,
            "time_unit": "ns",
            "\t0 User": 0.0005579999999999995,
            "\t1 System": 0.001341999999999996,
            "mean": 2.1994207497414084e-05,
            "stddev": 1.9398465095117333e-05
          },
          {
            "name": "BM_dispenso_mixed<false>/manual_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 198.85714285721747,
            "time_unit": "ns",
            "\t0 User": 0.0006040000000000004,
            "\t1 System": 0.001240000000000005,
            "mean": 5.729239188755512e-06,
            "stddev": 5.818560816710941e-06
          },
          {
            "name": "BM_dispenso_mixed<true>/manual_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 116.38095238098815,
            "time_unit": "ns",
            "\t0 User": 0.0008389999999999995,
            "\t1 System": 0.0016420000000000046,
            "mean": 1.3252527364395848e-05,
            "stddev": 3.167401925688938e-05
          },
          {
            "name": "BM_folly<2,false>/manual_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 101.57142857132926,
            "time_unit": "ns",
            "\t0 User": 0.0009779999999999997,
            "\t1 System": 0.0018740000000000007,
            "mean": 9.506486796773933e-05,
            "stddev": 0.00014178820903420984
          },
          {
            "name": "BM_folly<4,false>/manual_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 108.76190476198578,
            "time_unit": "ns",
            "\t0 User": 0.0012110000000000037,
            "\t1 System": 0.0017070000000000002,
            "mean": 9.695655324961982e-05,
            "stddev": 0.00027012745352402546
          },
          {
            "name": "BM_folly<6,false>/manual_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 181.71428571429695,
            "time_unit": "ns",
            "\t0 User": 0.001571000000000003,
            "\t1 System": 0.0017370000000000024,
            "mean": 0.00011235004855785578,
            "stddev": 0.00041143148170906773
          },
          {
            "name": "BM_folly<2,true>/manual_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 164.04761904767736,
            "time_unit": "ns",
            "\t0 User": 0.0010410000000000003,
            "\t1 System": 0.001894999999999994,
            "mean": 9.362332498862681e-05,
            "stddev": 5.742038248369901e-05
          },
          {
            "name": "BM_folly<4,true>/manual_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 177.42857142869073,
            "time_unit": "ns",
            "\t0 User": 0.0012819999999999984,
            "\t1 System": 0.002370999999999998,
            "mean": 9.366584334868824e-05,
            "stddev": 6.205024556313576e-06
          },
          {
            "name": "BM_folly<6,true>/manual_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 184.09523809515258,
            "time_unit": "ns",
            "\t0 User": 0.0019340000000000052,
            "\t1 System": 0.0009679999999999966,
            "mean": 7.875578021719309e-05,
            "stddev": 3.058280796108715e-05
          },
          {
            "name": "BM_folly_mixed<false>/manual_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 187.90476190465375,
            "time_unit": "ns",
            "\t0 User": 0.0,
            "\t1 System": 0.004200999999999996,
            "mean": 6.519594988630389e-05,
            "stddev": 0.00019775921061042866
          },
          {
            "name": "BM_folly_mixed<true>/manual_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 100000000.00000003,
            "cpu_time": 168.3333333332836,
            "time_unit": "ns",
            "\t0 User": 0.0009400000000000033,
            "\t1 System": 0.0015539999999999998,
            "mean": 5.960014752544842e-05,
            "stddev": 3.91825259550134e-06
          }
        ]
      }
    },
    {
      "name": "trivial_compute_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:18:55-07:00",
          "host_name": "linux-bench",
          "executable": "/tmp/dispenso-build/bin/trivial_compute_benchmark",
          "num_cpus": 192,
          "mhz_per_cpu": 5190,
          "cpu_scaling_enabled": true,
          "aslr_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 33554432,
              "num_sharing": 16
            }
          ],
          "load_avg": [
            18.6284,
            29.1445,
            31.0713
          ],
          "library_version": "1.9.4",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1523208,
            "real_time": 1416.0441653309208,
            "cpu_time": 1400.504979621956,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 51863849.75648962,
            "cpu_time": 51249909.17073169,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1399299244.4545963,
            "cpu_time": 1384836423.0000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_omp/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1012595,
            "real_time": 2090.2514677630497,
            "cpu_time": 2069.3704649934084,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_omp/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 968783,
            "real_time": 2142.0910936523483,
            "cpu_time": 2119.005957990591,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_omp/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 789450,
            "real_time": 2235.182444719835,
            "cpu_time": 2210.6305643169335,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_omp/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1137878,
            "real_time": 1921.9938130546711,
            "cpu_time": 1903.3675253410295,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_omp/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 763445,
            "real_time": 2187.5988356598546,
            "cpu_time": 2165.7814276077534,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_omp/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1097299,
            "real_time": 2685.5481395255333,
            "cpu_time": 2656.803469245848,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_omp/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 644711,
            "real_time": 3253.1471155468093,
            "cpu_time": 3228.694067574461,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_omp/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 495949,
            "real_time": 4338.474365387438,
            "cpu_time": 4315.419258835082,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_omp/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 319432,
            "real_time": 6456.13749719944,
            "cpu_time": 6431.124818427719,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_omp/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 251445,
            "real_time": 7778.469836066934,
            "cpu_time": 7746.477193819722,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_omp/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200697,
            "real_time": 10413.30787204524,
            "cpu_time": 10342.284304199886,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_omp/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154783,
            "real_time": 13430.334520099297,
            "cpu_time": 13338.92497884132,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_omp/96/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 106449,
            "real_time": 19702.18571359484,
            "cpu_time": 19488.308344841185,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_omp/128/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79916,
            "real_time": 26262.325690816368,
            "cpu_time": 25985.26057360231,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_omp/192/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 349,
            "real_time": 6148972.6933130305,
            "cpu_time": 6025782.974212033,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_omp/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 49584102.14333396,
            "cpu_time": 49094705.7380952,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_omp/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 42985081.08108856,
            "cpu_time": 42607553.14285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_omp/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 28435189.70864049,
            "cpu_time": 28198909.208333332,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_omp/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 97,
            "real_time": 21895593.45349631,
            "cpu_time": 21646007.855670117,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_omp/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 140,
            "real_time": 14841395.59241677,
            "cpu_time": 14740509.03571426,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_omp/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 185,
            "real_time": 11266212.746173747,
            "cpu_time": 11194047.545945993,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_omp/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 273,
            "real_time": 7575781.794602645,
            "cpu_time": 7386770.732600722,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_omp/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 371,
            "real_time": 5653712.32614363,
            "cpu_time": 5430837.948787036,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_omp/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 559,
            "real_time": 3786611.1127126543,
            "cpu_time": 3747933.266547413,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_omp/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 742,
            "real_time": 2840537.340957621,
            "cpu_time": 2820410.8153638775,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_omp/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1043,
            "real_time": 2011657.779440748,
            "cpu_time": 1989024.4506232052,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_omp/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1315,
            "real_time": 1558994.9003856333,
            "cpu_time": 1533465.5977186318,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_omp/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1747,
            "real_time": 1215047.6416775505,
            "cpu_time": 1196050.192902118,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_omp/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2070,
            "real_time": 1001926.9168988806,
            "cpu_time": 988499.1207729415,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_omp/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 341,
            "real_time": 6245890.788971569,
            "cpu_time": 6026429.932551304,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_omp/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1415209950.9956315,
            "cpu_time": 1400618604.4999964,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_omp/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 726854279.016455,
            "cpu_time": 719282241.3333365,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_omp/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 508420653.0009396,
            "cpu_time": 503326072.50000197,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_omp/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 388268271.39128,
            "cpu_time": 384567867.99999917,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_omp/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 275552845.5018066,
            "cpu_time": 273136380.9999987,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_omp/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 217170940.793585,
            "cpu_time": 215307868.00000072,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 36,
            "run_name": "BM_omp/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 157498503.0769872,
            "cpu_time": 155963612.84615386,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 37,
            "run_name": "BM_omp/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 128075455.58248152,
            "cpu_time": 127144504.29411653,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 38,
            "run_name": "BM_omp/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 97735861.54915394,
            "cpu_time": 97060084.90908986,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 39,
            "run_name": "BM_omp/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 82937143.76166463,
            "cpu_time": 82192940.31999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 40,
            "run_name": "BM_omp/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 70971454.56743117,
            "cpu_time": 70333687.50000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 41,
            "run_name": "BM_omp/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 64620135.34473954,
            "cpu_time": 64139035.37499976,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/96/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 42,
            "run_name": "BM_omp/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 59362319.13640061,
            "cpu_time": 58728417.749999866,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/128/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 43,
            "run_name": "BM_omp/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 55723424.43141482,
            "cpu_time": 55229560.540540606,
            "time_unit": "ns"
          },
          {
            "name": "BM_omp/192/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 44,
            "run_name": "BM_omp/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 58871815.69252991,
            "cpu_time": 53250691.61111095,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 245398,
            "real_time": 8696.457077940235,
            "cpu_time": 8627.040049226194,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 225258,
            "real_time": 9725.35110363063,
            "cpu_time": 9656.276700494433,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 208371,
            "real_time": 10349.578688842974,
            "cpu_time": 10277.122065930505,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 199733,
            "real_time": 10422.62069369878,
            "cpu_time": 10357.318835645638,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180951,
            "real_time": 11400.623008853066,
            "cpu_time": 11336.305917071422,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 170508,
            "real_time": 12341.240041609353,
            "cpu_time": 12273.924865695359,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 155681,
            "real_time": 13488.799352548083,
            "cpu_time": 13420.504550972837,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 139040,
            "real_time": 15118.654041821126,
            "cpu_time": 15043.788672324488,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 126582,
            "real_time": 16057.454124670176,
            "cpu_time": 15982.92271413005,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 129424,
            "real_time": 16331.867219594173,
            "cpu_time": 16229.59910063061,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 125472,
            "real_time": 16659.770673448176,
            "cpu_time": 16564.08864129051,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 126445,
            "real_time": 16829.03621358427,
            "cpu_time": 16710.532231404955,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/96/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 117053,
            "real_time": 17800.632064203106,
            "cpu_time": 17676.1360323954,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/128/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108149,
            "real_time": 18587.926018693677,
            "cpu_time": 18410.4145207076,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/192/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100000,
            "real_time": 15276.640580268577,
            "cpu_time": 15216.156300000135,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 49410317.42491759,
            "cpu_time": 48927933.02499996,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84,
            "real_time": 25674075.201442022,
            "cpu_time": 25401190.9166668,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124,
            "real_time": 17174998.41939688,
            "cpu_time": 16989779.95967736,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161,
            "real_time": 13252048.701863527,
            "cpu_time": 13125491.894409815,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 239,
            "real_time": 8928044.167226452,
            "cpu_time": 8851881.598326348,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 310,
            "real_time": 6789485.354637426,
            "cpu_time": 6713146.996774214,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 461,
            "real_time": 4546968.570663927,
            "cpu_time": 4504427.32971801,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 611,
            "real_time": 3428122.7447293736,
            "cpu_time": 3397211.613747937,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 886,
            "real_time": 2388410.036200889,
            "cpu_time": 2367728.8340857676,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1063,
            "real_time": 1803849.158026208,
            "cpu_time": 1791970.6810912725,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1650,
            "real_time": 1271657.4115311783,
            "cpu_time": 1260045.795757578,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1967,
            "real_time": 1033947.1428803285,
            "cpu_time": 1019400.6075241397,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2447,
            "real_time": 790595.2599505514,
            "cpu_time": 778905.9967307068,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3078,
            "real_time": 643282.8781693627,
            "cpu_time": 633753.672839511,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3287,
            "real_time": 571341.3915533718,
            "cpu_time": 566995.0827502221,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1352936672.9781032,
            "cpu_time": 1339353317.4999845,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 687719183.3142812,
            "cpu_time": 681235153.999997,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 460955460.4161531,
            "cpu_time": 456367995.1999916,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 349988193.5034258,
            "cpu_time": 346785318.6666711,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 236514067.44546774,
            "cpu_time": 234416387.77777976,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 179268747.91737643,
            "cpu_time": 178150068.16666803,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 36,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 119136739.16650522,
            "cpu_time": 118613408.27777806,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 37,
            "run_name": "BM_tbb/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 89829074.17837693,
            "cpu_time": 88917691.95652258,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 38,
            "run_name": "BM_tbb/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 61243230.12067401,
            "cpu_time": 60863498.15151517,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 39,
            "run_name": "BM_tbb/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 46501795.10936141,
            "cpu_time": 46053554.733332984,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 40,
            "run_name": "BM_tbb/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 32990284.24155568,
            "cpu_time": 32689259.93548387,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 41,
            "run_name": "BM_tbb/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74,
            "real_time": 27317037.134167008,
            "cpu_time": 26875710.027027436,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/96/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 42,
            "run_name": "BM_tbb/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 21736599.171576515,
            "cpu_time": 21413873.693181787,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 43,
            "run_name": "BM_tbb/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 20283679.438669387,
            "cpu_time": 19967876.234693564,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/192/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 44,
            "run_name": "BM_tbb/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 107,
            "real_time": 18457328.223962788,
            "cpu_time": 18334807.17757018,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1540077,
            "real_time": 1350.67698304802,
            "cpu_time": 1337.4727568816243,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1520067,
            "real_time": 1376.2797238766152,
            "cpu_time": 1363.0110541180034,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1521971,
            "real_time": 1405.6027138750324,
            "cpu_time": 1392.2947092947334,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1466336,
            "real_time": 1411.29718495238,
            "cpu_time": 1398.8675835552256,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1480130,
            "real_time": 1379.7596765673707,
            "cpu_time": 1366.5290920392229,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1515782,
            "real_time": 1364.417215676475,
            "cpu_time": 1352.1715833807439,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1538244,
            "real_time": 1376.6631367391433,
            "cpu_time": 1363.4847533941036,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1533359,
            "real_time": 1356.6830931164197,
            "cpu_time": 1343.7240111415474,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1541843,
            "real_time": 1400.5808996084356,
            "cpu_time": 1386.1807732693994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1515045,
            "real_time": 1375.2972452727515,
            "cpu_time": 1361.612895326562,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1531630,
            "real_time": 1358.0364768211948,
            "cpu_time": 1345.675417039369,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1503924,
            "real_time": 1489.0797786133835,
            "cpu_time": 1471.7609307385173,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/96/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1376750,
            "real_time": 1509.090866917206,
            "cpu_time": 1490.8966435445643,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/128/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1369547,
            "real_time": 1442.3813020266336,
            "cpu_time": 1427.726892906934,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/192/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000,
            "real_time": 1551.350254099816,
            "cpu_time": 1535.569976999966,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 52309520.02429625,
            "cpu_time": 51850072.42857187,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84,
            "real_time": 25377341.58353747,
            "cpu_time": 25104058.250000045,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124,
            "real_time": 17288701.50832189,
            "cpu_time": 17168201.8387099,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 163,
            "real_time": 12858173.644250156,
            "cpu_time": 12741047.95705532,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 241,
            "real_time": 8666508.236447835,
            "cpu_time": 8597964.016597318,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 317,
            "real_time": 6623211.646546559,
            "cpu_time": 6597018.356466965,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 477,
            "real_time": 4426180.934914206,
            "cpu_time": 4400447.607966522,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 637,
            "real_time": 3309469.7488249633,
            "cpu_time": 3286693.2951334934,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 938,
            "real_time": 2254618.149348048,
            "cpu_time": 2245299.8507462377,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1251,
            "real_time": 1715786.9304059078,
            "cpu_time": 1708821.4148681327,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1662,
            "real_time": 1288945.7159792213,
            "cpu_time": 1279018.4386281474,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1860,
            "real_time": 1154805.4747543826,
            "cpu_time": 1147386.7989247344,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2046,
            "real_time": 1016670.1793926472,
            "cpu_time": 1006607.0107526844,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2063,
            "real_time": 1028019.4289966971,
            "cpu_time": 991173.6330586642,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2282,
            "real_time": 1019556.6713891589,
            "cpu_time": 579599.7081507451,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1375330757.5243526,
            "cpu_time": 1362281440.0000038,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 714575235.3438487,
            "cpu_time": 707314986.9999989,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 489631742.5009329,
            "cpu_time": 484960246.2499973,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 369329597.1606858,
            "cpu_time": 365649416.6666656,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 249211537.62261385,
            "cpu_time": 246951456.87499577,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 188320188.45502964,
            "cpu_time": 186982892.8181815,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 125901212.70349139,
            "cpu_time": 124775465.23529233,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 94574359.67950997,
            "cpu_time": 93941981.3636355,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 63193635.940975085,
            "cpu_time": 62795187.45454673,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 48424842.29646454,
            "cpu_time": 48023253.52272615,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 34063354.33734761,
            "cpu_time": 33797558.661290385,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 27665412.932013474,
            "cpu_time": 27514523.98666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/96/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87,
            "real_time": 22667969.424470235,
            "cpu_time": 22224155.747125976,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/128/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 96,
            "real_time": 20711409.05294063,
            "cpu_time": 19136049.989583626,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/192/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 19291527.787263088,
            "cpu_time": 9350254.37234059,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1550119,
            "real_time": 1352.2557680793004,
            "cpu_time": 1339.1977274002654,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1520325,
            "real_time": 1391.5909802295155,
            "cpu_time": 1377.8263226612848,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1529117,
            "real_time": 1354.5879569754877,
            "cpu_time": 1341.4574404705374,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1524768,
            "real_time": 1360.7380657523797,
            "cpu_time": 1348.0249460901684,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1506598,
            "real_time": 1388.3970707586252,
            "cpu_time": 1375.0157653202634,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1539875,
            "real_time": 1376.0296043184867,
            "cpu_time": 1361.9473976783156,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1527277,
            "real_time": 1374.781769773679,
            "cpu_time": 1361.6601867244792,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1542880,
            "real_time": 1370.6347836424854,
            "cpu_time": 1357.5021971896783,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1541071,
            "real_time": 1357.6648681617432,
            "cpu_time": 1344.7692319172859,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1527664,
            "real_time": 1410.932275640563,
            "cpu_time": 1397.3813469454299,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1471512,
            "real_time": 1391.970121875155,
            "cpu_time": 1378.4329254535287,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1499272,
            "real_time": 1422.5686900330093,
            "cpu_time": 1408.3449634222607,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/96/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/96/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1499654,
            "real_time": 1429.9213724946023,
            "cpu_time": 1414.4340907968858,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/128/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1499804,
            "real_time": 1393.9271357851717,
            "cpu_time": 1380.0509239874332,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/192/100/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/192/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1368618,
            "real_time": 1465.320160924836,
            "cpu_time": 1450.2391770384377,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 49874548.02328504,
            "cpu_time": 49392520.65116497,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48,
            "real_time": 43891043.56232565,
            "cpu_time": 43501532.54166619,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 28959867.75734595,
            "cpu_time": 28674475.528571617,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 22328812.616599843,
            "cpu_time": 22144520.765956994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 142,
            "real_time": 14922498.162134064,
            "cpu_time": 14794372.76056397,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 181,
            "real_time": 11527047.253944473,
            "cpu_time": 11431555.988950407,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_static/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 268,
            "real_time": 7914033.84698527,
            "cpu_time": 7849829.712686595,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_static/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 348,
            "real_time": 6030756.399442239,
            "cpu_time": 5976595.037356254,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_static/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 517,
            "real_time": 4112708.8799445783,
            "cpu_time": 4081099.332688741,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_static/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 673,
            "real_time": 3116670.5692098383,
            "cpu_time": 3099256.582466622,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_static/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 932,
            "real_time": 2217676.7628472056,
            "cpu_time": 2200068.888412056,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_static/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1047,
            "real_time": 1987768.8414857518,
            "cpu_time": 1969186.4202483275,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/96/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_static/96/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1725,
            "real_time": 1219981.8104019633,
            "cpu_time": 1207501.3350724238,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_static/128/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2041,
            "real_time": 1021234.3581415993,
            "cpu_time": 999980.4414502598,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/192/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_static/192/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2359,
            "real_time": 1047544.2899650042,
            "cpu_time": 656721.2806273677,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1400449966.4879403,
            "cpu_time": 1386203524.4999902,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 716108810.3118042,
            "cpu_time": 708907762.3333348,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 495606920.24533635,
            "cpu_time": 490731209.74998116,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 382006934.1920316,
            "cpu_time": 378433778.3999945,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 272568132.7428902,
            "cpu_time": 269967478.24999547,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_static/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 212223990.7970652,
            "cpu_time": 210775678.00000453,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 36,
            "run_name": "BM_dispenso_static/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 154803629.57376722,
            "cpu_time": 153555378.92856675,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 37,
            "run_name": "BM_dispenso_static/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 124495087.29418193,
            "cpu_time": 123650215.88235484,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 38,
            "run_name": "BM_dispenso_static/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 96962730.95588285,
            "cpu_time": 96418408.9090891,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 39,
            "run_name": "BM_dispenso_static/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26,
            "real_time": 81671621.1534535,
            "cpu_time": 80951839.30769369,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 40,
            "run_name": "BM_dispenso_static/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 69209764.06739403,
            "cpu_time": 68238638.96666656,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 41,
            "run_name": "BM_dispenso_static/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 64171457.09168505,
            "cpu_time": 63562325.909091756,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/96/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 42,
            "run_name": "BM_dispenso_static/96/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 58501602.444771886,
            "cpu_time": 57614121.77777616,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 43,
            "run_name": "BM_dispenso_static/128/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 55537981.18336616,
            "cpu_time": 54743961.842105865,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/192/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 44,
            "run_name": "BM_dispenso_static/192/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 53031581.40159212,
            "cpu_time": 48024890.07499844,
            "time_unit": "ns"
          }
        ]
      }
    }
  ]
}

================================================
FILE: results/macos_arm64.json
================================================
{
  "machine_info": {
    "timestamp": "2026-03-20T10:21:46.312656",
    "platform": "Darwin",
    "platform_release": "25.3.0",
    "platform_version": "Darwin Kernel Version 25.3.0: Wed Jan 28 20:51:28 PST 2026; root:xnu-12377.91.3~2/RELEASE_ARM64_T6041",
    "architecture": "arm64",
    "processor": "arm",
    "python_version": "3.12.13",
    "cpu_model": "Apple M4 Pro",
    "cpu_cores": 12,
    "memory_gb": 48.0,
    "platform_id": "macos-m4-pro-12c",
    "compiler": {
      "compiler_path": "/usr/bin/c++",
      "build_type": "Release",
      "cxx_standard": "20",
      "compiler_id": "AppleClang",
      "compiler_version": "17.0.0.17000604",
      "compiler_summary": "AppleClang 17.0.0.17000604 Release C++20"
    }
  },
  "results": [
    {
      "name": "cascading_parallel_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:21:48-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/cascading_parallel_for_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            2.18408,
            2.60107,
            2.58154
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1009403,
            "real_time": 700.5002372642518,
            "cpu_time": 700.4070722991706,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/100000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8537,
            "real_time": 82564.75728780165,
            "cpu_time": 82556.16727187534,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/10000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 10531123.778919743,
            "cpu_time": 10528676.470588233,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17360,
            "real_time": 40468.82442649334,
            "cpu_time": 40462.61520737326,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16325,
            "real_time": 42482.3762312714,
            "cpu_time": 42478.223583460924,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15576,
            "real_time": 45956.75770636757,
            "cpu_time": 45920.45454545456,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15275,
            "real_time": 45833.22422490524,
            "cpu_time": 45820.294599018016,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15034,
            "real_time": 46524.635821323674,
            "cpu_time": 46501.52986563801,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13934,
            "real_time": 48438.211717099184,
            "cpu_time": 48365.4370604277,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12357,
            "real_time": 56151.88080459915,
            "cpu_time": 56032.77494537507,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3636,
            "real_time": 194204.43730386806,
            "cpu_time": 194174.9174917493,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4678,
            "real_time": 143290.9542392476,
            "cpu_time": 143264.00171013267,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5580,
            "real_time": 127191.27097948182,
            "cpu_time": 127174.55197132606,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6136,
            "real_time": 116269.12205482468,
            "cpu_time": 116217.07953063888,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6302,
            "real_time": 111341.52506072912,
            "cpu_time": 111328.78451285306,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6516,
            "real_time": 106468.23834384317,
            "cpu_time": 106296.1939840391,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6204,
            "real_time": 113265.23871415642,
            "cpu_time": 112475.66086395872,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 13662088.23552976,
            "cpu_time": 13660882.352941204,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87,
            "real_time": 7987072.793017516,
            "cpu_time": 7986735.63218397,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115,
            "real_time": 5930657.608879972,
            "cpu_time": 5930313.043478278,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127,
            "real_time": 5149498.361888833,
            "cpu_time": 5149228.346456695,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 145,
            "real_time": 4815002.013768615,
            "cpu_time": 4814889.655172417,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 147,
            "real_time": 4719341.271968723,
            "cpu_time": 4719340.136054431,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 145,
            "real_time": 4786840.799778444,
            "cpu_time": 4726793.103448252,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_task_group/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14797,
            "real_time": 46310.25147244597,
            "cpu_time": 46307.29201865246,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_task_group/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19841,
            "real_time": 36138.46328049984,
            "cpu_time": 36051.66070258557,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_task_group/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22802,
            "real_time": 31020.54828164434,
            "cpu_time": 31016.27050258759,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_task_group/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24067,
            "real_time": 29387.4583047368,
            "cpu_time": 29331.699006939027,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_task_group/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26880,
            "real_time": 26002.083333358834,
            "cpu_time": 25988.98809523811,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_task_group/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27278,
            "real_time": 28964.18050935298,
            "cpu_time": 28704.74374954171,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_task_group/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26855,
            "real_time": 25367.38346523467,
            "cpu_time": 25139.713274995338,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_task_group/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3660,
            "real_time": 191380.60106839883,
            "cpu_time": 191369.67213114686,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_task_group/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5831,
            "real_time": 119517.87857815709,
            "cpu_time": 119418.79608986464,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_task_group/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6832,
            "real_time": 104196.9531729645,
            "cpu_time": 103205.64988290385,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_task_group/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8266,
            "real_time": 86986.97978760907,
            "cpu_time": 86909.02492136456,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_task_group/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10208,
            "real_time": 69326.33316816197,
            "cpu_time": 69302.31191222604,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_task_group/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11253,
            "real_time": 63590.45366329318,
            "cpu_time": 63475.78423531484,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_task_group/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11532,
            "real_time": 61339.696066824254,
            "cpu_time": 60693.895248005,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_task_group/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 13977541.660424324,
            "cpu_time": 13976599.999999875,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb_task_group/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 8006809.658995322,
            "cpu_time": 8006340.909090894,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb_task_group/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115,
            "real_time": 5897592.034433847,
            "cpu_time": 5896930.434782633,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb_task_group/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 138,
            "real_time": 5095387.384653145,
            "cpu_time": 5095043.478260828,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb_task_group/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4709900.731858391,
            "cpu_time": 4709778.523489985,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb_task_group/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 4616176.873747728,
            "cpu_time": 4608761.58940393,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb_task_group/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 152,
            "real_time": 4597889.256394027,
            "cpu_time": 4523861.842105257,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_blocking/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1132922,
            "real_time": 625.2072446734195,
            "cpu_time": 625.1789620115105,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_blocking/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1148008,
            "real_time": 609.6258327131047,
            "cpu_time": 609.519271642708,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_blocking/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1151119,
            "real_time": 614.8547995581773,
            "cpu_time": 614.806114745739,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_blocking/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1132890,
            "real_time": 619.0147180562062,
            "cpu_time": 618.8720881992047,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_blocking/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1140663,
            "real_time": 615.6145224666849,
            "cpu_time": 615.4788925388102,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_blocking/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1128116,
            "real_time": 615.83017610162,
            "cpu_time": 615.4934421637473,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_blocking/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1128179,
            "real_time": 616.0202263334468,
            "cpu_time": 615.3571374755221,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_blocking/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8183,
            "real_time": 85654.28327098917,
            "cpu_time": 85646.82879139687,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_blocking/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16044,
            "real_time": 42077.09785398746,
            "cpu_time": 41974.632261281724,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_blocking/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12694,
            "real_time": 45516.625336456906,
            "cpu_time": 45276.90247360959,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_blocking/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23536,
            "real_time": 28130.824395788124,
            "cpu_time": 27954.36777702262,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_blocking/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 51310.06670417264,
            "cpu_time": 48782.20000000084,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_blocking/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12192,
            "real_time": 69758.33538707634,
            "cpu_time": 61748.60564304475,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_blocking/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8503,
            "real_time": 74868.07102310973,
            "cpu_time": 67074.56191932177,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_blocking/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67,
            "real_time": 10497190.92526487,
            "cpu_time": 10494850.746268587,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_blocking/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 6129634.956029796,
            "cpu_time": 6126274.336283213,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_blocking/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 119,
            "real_time": 5701286.0676931115,
            "cpu_time": 5668521.008403306,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_blocking/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 133,
            "real_time": 4770311.714269053,
            "cpu_time": 4766932.330827085,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_blocking/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 139,
            "real_time": 4712284.769588184,
            "cpu_time": 4703107.91366905,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_blocking/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4715272.09417932,
            "cpu_time": 4663530.201342265,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_blocking/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5178351.670037955,
            "cpu_time": 4897049.999999937,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_cascaded/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1159713,
            "real_time": 605.3549102323939,
            "cpu_time": 605.2713041933633,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_cascaded/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1141338,
            "real_time": 617.2509080811373,
            "cpu_time": 616.7787281243642,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_cascaded/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1138697,
            "real_time": 613.3981068106079,
            "cpu_time": 613.125352925316,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_cascaded/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1150036,
            "real_time": 612.0413117420418,
            "cpu_time": 611.9669297309006,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_cascaded/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1142076,
            "real_time": 617.5104589566071,
            "cpu_time": 617.2286257657076,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_cascaded/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1143504,
            "real_time": 613.3846055813841,
            "cpu_time": 613.2772600707987,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_cascaded/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1134948,
            "real_time": 612.7556064463755,
            "cpu_time": 612.4016254489155,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_cascaded/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8205,
            "real_time": 85467.14405634196,
            "cpu_time": 85462.64472882435,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_cascaded/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8841,
            "real_time": 79109.74436570091,
            "cpu_time": 79101.57222033705,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_cascaded/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18115,
            "real_time": 39438.96633034339,
            "cpu_time": 39264.80817002547,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_cascaded/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28039,
            "real_time": 24877.132421022685,
            "cpu_time": 24874.78155426358,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_cascaded/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20730,
            "real_time": 33889.15822521801,
            "cpu_time": 33831.59671972992,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_cascaded/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21317,
            "real_time": 33917.59121880441,
            "cpu_time": 33327.625838532964,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_cascaded/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13527,
            "real_time": 52009.26850175269,
            "cpu_time": 46971.53840467123,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_cascaded/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 10221724.044126661,
            "cpu_time": 10220086.956521705,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_cascaded/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86,
            "real_time": 8297997.093651184,
            "cpu_time": 8296639.5348836845,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_cascaded/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127,
            "real_time": 5467769.354361836,
            "cpu_time": 5467338.582677238,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_cascaded/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 152,
            "real_time": 4694495.618184048,
            "cpu_time": 4693171.052631603,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_cascaded/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 4577143.490160292,
            "cpu_time": 4576741.721854356,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_cascaded/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 155,
            "real_time": 4465798.116589506,
            "cpu_time": 4463890.3225806225,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_cascaded/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4684942.952613503,
            "cpu_time": 4616798.657718159,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "concurrent_vector_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:23:10-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/concurrent_vector_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            5.05566,
            3.44775,
            2.9082
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_std_push_back_serial",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2213,
            "real_time": 317631.75918000605,
            "cpu_time": 317622.6841391777,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_push_back_serial",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1153,
            "real_time": 614917.6790694814,
            "cpu_time": 614874.2411101472,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 368,
            "real_time": 1901861.0733125925,
            "cpu_time": 1901769.0217391297,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 370,
            "real_time": 1892992.4542159848,
            "cpu_time": 1888705.4054054045,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 202,
            "real_time": 3386122.935871123,
            "cpu_time": 3385905.940594063,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2691,
            "real_time": 264930.2924185471,
            "cpu_time": 263694.90895577834,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_push_back_serial_reserve",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2662,
            "real_time": 261712.85686013865,
            "cpu_time": 261686.32607062408,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_reserve",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 370,
            "real_time": 1891775.1136688967,
            "cpu_time": 1891678.3783783778,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_reserve",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 382,
            "real_time": 1833904.557434735,
            "cpu_time": 1833706.8062827229,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 201,
            "real_time": 3402468.0794369592,
            "cpu_time": 3402218.905472632,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2693,
            "real_time": 259512.3623864711,
            "cpu_time": 259490.5310063132,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17821,
            "real_time": 39443.283205270054,
            "cpu_time": 39439.59373772512,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2421,
            "real_time": 290307.6555123664,
            "cpu_time": 290290.78893019445,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1054,
            "real_time": 666610.9269613884,
            "cpu_time": 666577.7988614794,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1336,
            "real_time": 516669.6923579964,
            "cpu_time": 516152.694610778,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate_reverse",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10788,
            "real_time": 64897.47868062467,
            "cpu_time": 64894.69781238416,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate_reverse",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2366,
            "real_time": 295664.90570821706,
            "cpu_time": 295639.8985629747,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate_reverse",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1043,
            "real_time": 671254.0748197861,
            "cpu_time": 671196.5484180269,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate_reverse",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1356,
            "real_time": 516030.6968759185,
            "cpu_time": 516005.16224188753,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_lower_bound",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_std_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 24293461.204345886,
            "cpu_time": 24290931.034482714,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_lower_bound",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 61653189.371678635,
            "cpu_time": 61653181.818181746,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_lower_bound",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 57641239.58457882,
            "cpu_time": 57637833.33333331,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_lower_bound",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 38295150.44344589,
            "cpu_time": 38294222.22222236,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_index",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_std_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17833,
            "real_time": 39254.75478558765,
            "cpu_time": 39250.04205686071,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_index",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2954,
            "real_time": 237289.22645850817,
            "cpu_time": 237273.527420448,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_index",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1045,
            "real_time": 667483.0545559882,
            "cpu_time": 667419.1387559803,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_index",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1401,
            "real_time": 499907.56601044675,
            "cpu_time": 499874.37544611125,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_random",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_std_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1181,
            "real_time": 592005.574148366,
            "cpu_time": 591961.8966977121,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_random",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1109,
            "real_time": 628493.8764702143,
            "cpu_time": 628429.2155094678,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_random",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 540,
            "real_time": 1280694.1369220752,
            "cpu_time": 1280588.8888888927,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_random",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 856,
            "real_time": 818150.7488936796,
            "cpu_time": 818094.6261682219,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74,
            "real_time": 10624761.82375355,
            "cpu_time": 9385081.081081046,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 11115774.874471955,
            "cpu_time": 9875930.555555554,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel",
            "family_index": 33,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 87730963.00299382,
            "cpu_time": 82591000.00000036,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel",
            "family_index": 34,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 68354545.45745796,
            "cpu_time": 67578090.90909074,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_reserve",
            "family_index": 35,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79,
            "real_time": 10547246.835548196,
            "cpu_time": 9399848.101265796,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_reserve",
            "family_index": 36,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 87510212.99718155,
            "cpu_time": 83862555.555556,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_reserve",
            "family_index": 37,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 68953137.50719652,
            "cpu_time": 67390999.99999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_clear",
            "family_index": 38,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 10716977.430598088,
            "cpu_time": 9132458.333333304,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_clear",
            "family_index": 39,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 11207459.514428463,
            "cpu_time": 9731357.142857177,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_clear",
            "family_index": 40,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 88056013.89354302,
            "cpu_time": 83354111.11111075,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_clear",
            "family_index": 41,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 68515670.45404491,
            "cpu_time": 66944363.63636346,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_10",
            "family_index": 42,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 434,
            "real_time": 1948020.7374894537,
            "cpu_time": 1651421.6589861815,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_10",
            "family_index": 43,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 274,
            "real_time": 2911470.346509676,
            "cpu_time": 2570040.1459854334,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_10",
            "family_index": 44,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 9460173.790821021,
            "cpu_time": 9311947.368421102,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_10",
            "family_index": 45,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 6772294.874491097,
            "cpu_time": 6688307.692307727,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_100",
            "family_index": 46,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 892,
            "real_time": 914227.1580535817,
            "cpu_time": 774608.7443946169,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_100",
            "family_index": 47,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 430,
            "real_time": 1872851.9370237926,
            "cpu_time": 1637806.9767441822,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_100",
            "family_index": 48,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 381,
            "real_time": 1857728.3462802845,
            "cpu_time": 1834582.677165359,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_100",
            "family_index": 49,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1003,
            "real_time": 705237.9522188487,
            "cpu_time": 696478.5643070732,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_max",
            "family_index": 50,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2003,
            "real_time": 420329.71342349506,
            "cpu_time": 319559.6605092339,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_max",
            "family_index": 51,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1180,
            "real_time": 695439.1593492385,
            "cpu_time": 602417.7966101697,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_max",
            "family_index": 52,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 696,
            "real_time": 1028960.8476811956,
            "cpu_time": 1026500.0000000005,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_max",
            "family_index": 53,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11496,
            "real_time": 65862.77835994957,
            "cpu_time": 61107.08072372981,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_each_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:24:01-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/for_each_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.44629,
            4.06006,
            3.16357
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000000,
            "real_time": 3.7509016692638397e-10,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000000,
            "real_time": 4.169996827840805e-10,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000000,
            "real_time": 3.7497375160455705e-10,
            "cpu_time": 9.999999999177334e-10,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12713991,
            "real_time": 55.4285662143635,
            "cpu_time": 55.420677897286566,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3496322,
            "real_time": 209.61013946923532,
            "cpu_time": 209.05025338055253,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1364550,
            "real_time": 497.69985200673796,
            "cpu_time": 486.1302260818583,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 923998,
            "real_time": 797.0115552080647,
            "cpu_time": 734.996179645411,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 414870,
            "real_time": 1681.6550966608227,
            "cpu_time": 1572.244317497049,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 318498,
            "real_time": 2194.8983982229347,
            "cpu_time": 2091.881267700269,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 164129,
            "real_time": 4148.266248173059,
            "cpu_time": 3597.456878430991,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10593,
            "real_time": 49261.80024954423,
            "cpu_time": 49258.66138015665,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28255,
            "real_time": 26275.125356732882,
            "cpu_time": 26267.563263139233,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28760,
            "real_time": 24247.138666426854,
            "cpu_time": 24237.239221140535,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39558,
            "real_time": 17623.938267413094,
            "cpu_time": 17619.647100460057,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32604,
            "real_time": 21452.541621925207,
            "cpu_time": 21344.31358115562,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36492,
            "real_time": 19786.268223890063,
            "cpu_time": 19226.213964704555,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23425,
            "real_time": 27283.67128917349,
            "cpu_time": 24848.665955176075,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101,
            "real_time": 6919777.227132259,
            "cpu_time": 6919039.6039603865,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 155,
            "real_time": 4510779.574423307,
            "cpu_time": 4510438.70967741,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 4024521.068045446,
            "cpu_time": 4023562.500000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 3582819.6610777806,
            "cpu_time": 3582420.512820506,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 3481717.9146921267,
            "cpu_time": 3480970.0000000102,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 203,
            "real_time": 3446394.502497255,
            "cpu_time": 3434246.305418732,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 188,
            "real_time": 3751593.9732250622,
            "cpu_time": 3588765.957446826,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_deque/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 400731,
            "real_time": 1724.050130778557,
            "cpu_time": 1724.0368227064996,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_deque/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 188005,
            "real_time": 3744.823717605097,
            "cpu_time": 3739.044174357083,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_deque/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154809,
            "real_time": 4458.698299872223,
            "cpu_time": 4442.338623723425,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_deque/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 123876,
            "real_time": 5686.056015611495,
            "cpu_time": 5671.122735638859,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_deque/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62259,
            "real_time": 11357.86527222049,
            "cpu_time": 11237.588139867325,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_deque/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52197,
            "real_time": 13613.211335330809,
            "cpu_time": 13167.557522462988,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_deque/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36178,
            "real_time": 21813.72889580406,
            "cpu_time": 20106.307700812606,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_deque/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 403,
            "real_time": 1743384.40930762,
            "cpu_time": 1742583.1265508684,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_deque/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186,
            "real_time": 3713450.2687671734,
            "cpu_time": 3711521.505376336,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_deque/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5034803.749294952,
            "cpu_time": 5034409.999999952,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_deque/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 109,
            "real_time": 5919903.669497767,
            "cpu_time": 5914458.715596335,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_deque/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 12118319.915686497,
            "cpu_time": 12107338.983050784,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_deque/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 16010042.577464549,
            "cpu_time": 16005111.11111097,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_deque/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 51859881.91321182,
            "cpu_time": 46682416.66666617,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_deque/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 173638468.7537793,
            "cpu_time": 173638500.00000092,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_deque/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 382169207.9654895,
            "cpu_time": 378448499.9999975,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_deque/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 500838895.9686272,
            "cpu_time": 500823499.9999992,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_deque/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 637812250.0609607,
            "cpu_time": 637549999.9999974,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_deque/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1427234707.9822793,
            "cpu_time": 1426107999.9999921,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_deque/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1651335333.9396417,
            "cpu_time": 1651156000.0000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_deque/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5021056332.974694,
            "cpu_time": 3647441999.999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_list/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 281490,
            "real_time": 2460.3825501094193,
            "cpu_time": 2460.247255675155,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_list/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 208089,
            "real_time": 3443.647309368769,
            "cpu_time": 3442.724026738576,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_list/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 141536,
            "real_time": 4992.919059544994,
            "cpu_time": 4887.2442346823,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_list/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 116049,
            "real_time": 6003.801196348504,
            "cpu_time": 5850.744082241176,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_list/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 67624,
            "real_time": 10456.12934644083,
            "cpu_time": 10142.92263101844,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_list/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53265,
            "real_time": 13214.687562046598,
            "cpu_time": 12884.070214962796,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_list/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26007,
            "real_time": 26436.440189742774,
            "cpu_time": 24027.838658822726,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_list/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 270,
            "real_time": 2584803.2406496783,
            "cpu_time": 2584625.9259259445,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_list/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 157,
            "real_time": 4416677.815347293,
            "cpu_time": 4415152.866242043,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_list/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 121,
            "real_time": 6871792.702350673,
            "cpu_time": 6854595.041322245,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_list/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 99,
            "real_time": 7257237.797603011,
            "cpu_time": 7256151.515151583,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_list/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61,
            "real_time": 11571150.278809985,
            "cpu_time": 11568819.672131117,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_list/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 17512853.126390837,
            "cpu_time": 17486199.999999918,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_list/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 54945833.30582827,
            "cpu_time": 53259599.99999981,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_set/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 820111,
            "real_time": 852.899485639906,
            "cpu_time": 852.5589828693934,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_set/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 557607,
            "real_time": 1260.7515992551105,
            "cpu_time": 1256.7112679718812,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_set/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 387696,
            "real_time": 1735.8688661636231,
            "cpu_time": 1718.555259791179,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_set/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 327576,
            "real_time": 2197.126010211535,
            "cpu_time": 2105.636554570529,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_set/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180320,
            "real_time": 3847.4478202412515,
            "cpu_time": 3604.375554569645,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_set/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 129717,
            "real_time": 5052.495694912164,
            "cpu_time": 4437.082263697106,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_set/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 137354,
            "real_time": 6499.172459342831,
            "cpu_time": 5611.805990360713,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_set/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 790370,
            "real_time": 877.080617834821,
            "cpu_time": 876.6172805141741,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_set/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 529021,
            "real_time": 1333.3816144933303,
            "cpu_time": 1331.0208857493376,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_set/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 395958,
            "real_time": 1806.667626529415,
            "cpu_time": 1793.2255441233701,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_set/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 303029,
            "real_time": 2370.3171414366916,
            "cpu_time": 2291.7047543303825,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_set/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 177051,
            "real_time": 3947.9827332155937,
            "cpu_time": 3675.308244517118,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_set/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 121894,
            "real_time": 5488.374464619157,
            "cpu_time": 5082.3994618275665,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_set/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 109375,
            "real_time": 6589.241517441614,
            "cpu_time": 5784.566857142766,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_latency_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:25:16-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/for_latency_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            7.06445,
            4.81104,
            3.52344
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 33923804.154619575,
            "cpu_time": 20949.99999999736,
            "time_unit": "ns",
            "mean": 6.24917447566986e-08,
            "stddev": 8.587150826082419e-08
          },
          {
            "name": "BM_tbb/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 48862776.785556756,
            "cpu_time": 14858857.142857144,
            "time_unit": "ns",
            "mean": 0.01490082878652694,
            "stddev": 0.002949404112330031
          },
          {
            "name": "BM_tbb/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 35739713.55945104,
            "cpu_time": 1290437.5000000051,
            "time_unit": "ns",
            "mean": 0.001282141987758223,
            "stddev": 0.000351309981794495
          },
          {
            "name": "BM_tbb/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 43657950.49670851,
            "cpu_time": 8773749.999999996,
            "time_unit": "ns",
            "mean": 0.010039383261755574,
            "stddev": 0.0030713809355545326
          },
          {
            "name": "BM_tbb/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 42058494.74965362,
            "cpu_time": 7397500.000000015,
            "time_unit": "ns",
            "mean": 0.008642952001537196,
            "stddev": 0.002124707657245938
          },
          {
            "name": "BM_tbb/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 40954658.87803584,
            "cpu_time": 7849687.4999999795,
            "time_unit": "ns",
            "mean": 0.008271031889307778,
            "stddev": 0.0013236599199907661
          },
          {
            "name": "BM_tbb/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 42402966.129884586,
            "cpu_time": 8393812.499999987,
            "time_unit": "ns",
            "mean": 0.009023736798553728,
            "stddev": 0.0015069593150374826
          },
          {
            "name": "BM_tbb/12/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 41874554.68490952,
            "cpu_time": 8403687.500000007,
            "time_unit": "ns",
            "mean": 0.00922724212432513,
            "stddev": 0.001227983250858294
          },
          {
            "name": "BM_dispenso/1/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 54589112.15492452,
            "cpu_time": 20567846.153846152,
            "time_unit": "ns",
            "mean": 0.020752502551588874,
            "stddev": 0.0018596913951864178
          },
          {
            "name": "BM_dispenso/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 43784244.40510571,
            "cpu_time": 10625266.666666662,
            "time_unit": "ns",
            "mean": 0.0111671816635256,
            "stddev": 0.0030754829924476913
          },
          {
            "name": "BM_dispenso/3/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 43358578.46993296,
            "cpu_time": 9496941.17647059,
            "time_unit": "ns",
            "mean": 0.01034246281693306,
            "stddev": 0.0012958759819160952
          },
          {
            "name": "BM_dispenso/4/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 42765568.58573766,
            "cpu_time": 8872941.176470593,
            "time_unit": "ns",
            "mean": 0.01026216076741762,
            "stddev": 0.0035903512547681518
          },
          {
            "name": "BM_dispenso/6/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41872441.172873706,
            "cpu_time": 7384764.70588236,
            "time_unit": "ns",
            "mean": 0.009661495356875308,
            "stddev": 0.002765153963870498
          },
          {
            "name": "BM_dispenso/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41280727.943076804,
            "cpu_time": 8708823.529411767,
            "time_unit": "ns",
            "mean": 0.0099034447236644,
            "stddev": 0.0017273019322917515
          },
          {
            "name": "BM_dispenso/12/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 42843593.75014901,
            "cpu_time": 9419000.000000011,
            "time_unit": "ns",
            "mean": 0.010476148192537948,
            "stddev": 0.0019118203670036927
          }
        ]
      }
    },
    {
      "name": "future_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:25:33-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/future_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.34229,
            4.76855,
            3.53076
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial_tree<kSmallSize>/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3180,
            "real_time": 219139.01982541961,
            "cpu_time": 218897.16981132078,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kMediumSize>/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 398,
            "real_time": 1746741.9372347356,
            "cpu_time": 1746562.814070351,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kLargeSize>/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 14027711.65827289,
            "cpu_time": 14027560.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kSmallSize>/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 354523624.9919981,
            "cpu_time": 30333.333333280403,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kMediumSize>/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async hangs on this platform at this tree depth",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kLargeSize>/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async hangs on this platform at this tree depth",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kSmallSize>/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 170,
            "real_time": 4020985.052920878,
            "cpu_time": 10547.058823530326,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kMediumSize>/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31890174.27344383,
            "cpu_time": 14590.909090909805,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kLargeSize>/real_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 254601930.6561599,
            "cpu_time": 39000.00000012227,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kSmallSize>/real_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1009,
            "real_time": 690767.3022359964,
            "cpu_time": 124647.17542120913,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kMediumSize>/real_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 150,
            "real_time": 4652623.606380075,
            "cpu_time": 718199.9999999977,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kLargeSize>/real_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 34612708.300119266,
            "cpu_time": 6023850.000000008,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1170,
            "real_time": 577466.0965300396,
            "cpu_time": 518239.31623931724,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 156,
            "real_time": 4399069.448341973,
            "cpu_time": 3965262.8205128196,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 34593860.39758101,
            "cpu_time": 33327399.99999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1215,
            "real_time": 574602.0913438357,
            "cpu_time": 408180.24691358017,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4764640.664731796,
            "cpu_time": 2959523.4899328914,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33979803.573207125,
            "cpu_time": 24248047.61904763,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1016,
            "real_time": 678018.4960766424,
            "cpu_time": 120951.77165354372,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 147,
            "real_time": 4746979.870675068,
            "cpu_time": 546680.27210885,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 36242789.472453296,
            "cpu_time": 1249105.2631578948,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:25:55-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/graph_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            8.41797,
            5.39355,
            3.79199
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_taskflow_build_big_tree",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_build_big_tree",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 243,
            "real_time": 2875657.4077010457,
            "cpu_time": 2875485.5967078195,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::Graph>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 851,
            "real_time": 827747.3067011156,
            "cpu_time": 827685.0763807284,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 807,
            "real_time": 862210.0879697884,
            "cpu_time": 862167.286245353,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_chain",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_chain",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12263,
            "real_time": 60791.02103590594,
            "cpu_time": 58967.62619261194,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_group",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_group",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1308093,
            "real_time": 541.1421435173419,
            "cpu_time": 540.8919702192431,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::Graph>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73532,
            "real_time": 9584.254691533728,
            "cpu_time": 9579.62519719306,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66108,
            "real_time": 10396.300373474847,
            "cpu_time": 10395.685847401233,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::Graph>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195459,
            "real_time": 3659.363984078001,
            "cpu_time": 3659.15614016239,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 191322,
            "real_time": 3698.6554966538197,
            "cpu_time": 3698.3357899248385,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::Graph>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 701,
            "real_time": 951695.781049668,
            "cpu_time": 951616.2624822138,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 704,
            "real_time": 990644.596558096,
            "cpu_time": 990267.0454545687,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_scene_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:26:06-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/graph_scene_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            8.14502,
            5.43018,
            3.82324
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_scene_graph_parallel_for/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_parallel_for/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74,
            "real_time": 8991288.89432177,
            "cpu_time": 8593310.810810804,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_concurrent_task_set/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_concurrent_task_set/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 118,
            "real_time": 6132357.387795602,
            "cpu_time": 5699567.796610213,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_taskflow/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 117,
            "real_time": 5869669.512383895,
            "cpu_time": 30717.948717945754,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_partial_revaluation/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_partial_revaluation/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3363,
            "real_time": 211265.1030637419,
            "cpu_time": 178140.9455842942,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "idle_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:26:11-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/idle_pool_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            7.65283,
            5.37305,
            3.8125
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_tbb_mostly_idle/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34811,
            "real_time": 19.876605094076083,
            "cpu_time": 19.8752980379765,
            "time_unit": "us",
            "\t0 User": 0.6915899999999999,
            "\t1 System": 0.0003010000000000009
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31183,
            "real_time": 21.382990475001293,
            "cpu_time": 21.381618189398065,
            "time_unit": "us",
            "\t0 User": 1.3257750000000001,
            "\t1 System": 0.007456000000000001
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31283,
            "real_time": 22.286794489143066,
            "cpu_time": 22.26317808394335,
            "time_unit": "us",
            "\t0 User": 2.0631470000000007,
            "\t1 System": 0.024092999999999996
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30977,
            "real_time": 22.525470509770873,
            "cpu_time": 22.523840268586387,
            "time_unit": "us",
            "\t0 User": 2.756743,
            "\t1 System": 0.03369499999999999
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25307,
            "real_time": 27.53688208799312,
            "cpu_time": 27.376931283834494,
            "time_unit": "us",
            "\t0 User": 3.9313660000000006,
            "\t1 System": 0.11682800000000002
          },
          {
            "name": "BM_tbb_mostly_idle/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_mostly_idle/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23640,
            "real_time": 30.76591581644336,
            "cpu_time": 30.744077834179368,
            "time_unit": "us",
            "\t0 User": 5.565076999999999,
            "\t1 System": 0.23336
          },
          {
            "name": "BM_tbb_mostly_idle/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_mostly_idle/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22614,
            "real_time": 31.450521047075117,
            "cpu_time": 30.187184929689543,
            "time_unit": "us",
            "\t0 User": 4.151220000000002,
            "\t1 System": 2.6798740000000003
          },
          {
            "name": "BM_tbb_mostly_idle/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3536,
            "real_time": 202.49054977371108,
            "cpu_time": 202.36283936651537,
            "time_unit": "us",
            "\t0 User": 0.7151320000000005,
            "\t1 System": 0.00044100000000035777
          },
          {
            "name": "BM_tbb_mostly_idle/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3319,
            "real_time": 211.45170473564627,
            "cpu_time": 211.37119614341688,
            "time_unit": "us",
            "\t0 User": 1.3817029999999981,
            "\t1 System": 0.020817000000000085
          },
          {
            "name": "BM_tbb_mostly_idle/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3176,
            "real_time": 218.94605381951504,
            "cpu_time": 218.3063602015114,
            "time_unit": "us",
            "\t0 User": 2.041240000000002,
            "\t1 System": 0.03802299999999992
          },
          {
            "name": "BM_tbb_mostly_idle/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3143,
            "real_time": 223.13469074011047,
            "cpu_time": 223.02036271078592,
            "time_unit": "us",
            "\t0 User": 2.7607169999999996,
            "\t1 System": 0.04319600000000001
          },
          {
            "name": "BM_tbb_mostly_idle/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2507,
            "real_time": 266.5956322427677,
            "cpu_time": 259.36338252891966,
            "time_unit": "us",
            "\t0 User": 3.673504000000001,
            "\t1 System": 0.19490100000000066
          },
          {
            "name": "BM_tbb_mostly_idle/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_mostly_idle/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2245,
            "real_time": 291.8967149035802,
            "cpu_time": 285.72917594654734,
            "time_unit": "us",
            "\t0 User": 4.657289000000006,
            "\t1 System": 0.44586399999999937
          },
          {
            "name": "BM_tbb_mostly_idle/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_mostly_idle/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2324,
            "real_time": 307.4214535463992,
            "cpu_time": 256.581755593804,
            "time_unit": "us",
            "\t0 User": 4.153966000000004,
            "\t1 System": 2.3588839999999998
          },
          {
            "name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 20110.643383197705,
            "cpu_time": 20108.588235294133,
            "time_unit": "us",
            "\t0 User": 0.6832999999999956,
            "\t1 System": 0.00041000000000046555
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 21075.44572826362,
            "cpu_time": 21044.515151515145,
            "time_unit": "us",
            "\t0 User": 1.3734490000000008,
            "\t1 System": 0.015882000000001284
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21924.488279182697,
            "cpu_time": 21842.656250000036,
            "time_unit": "us",
            "\t0 User": 2.0555909999999997,
            "\t1 System": 0.039666999999999675
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31,
            "real_time": 22010.22312886292,
            "cpu_time": 21993.93548387099,
            "time_unit": "us",
            "\t0 User": 2.68248100000001,
            "\t1 System": 0.04473600000000033
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26,
            "real_time": 27368.96795865435,
            "cpu_time": 27349.076923076864,
            "time_unit": "us",
            "\t0 User": 4.162501000000006,
            "\t1 System": 0.09997000000000078
          },
          {
            "name": "BM_tbb_mostly_idle/8/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb_mostly_idle/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 28985.84850217131,
            "cpu_time": 28703.590909090857,
            "time_unit": "us",
            "\t0 User": 4.704695999999998,
            "\t1 System": 0.33609000000000044
          },
          {
            "name": "BM_tbb_mostly_idle/12/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb_mostly_idle/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 27960.51908323231,
            "cpu_time": 21166.249999999978,
            "time_unit": "us",
            "\t0 User": 4.458363000000006,
            "\t1 System": 1.6796430000000004
          },
          {
            "name": "BM_tbb_very_idle/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 104556.84515181929,
            "cpu_time": 47.71428571410346,
            "time_unit": "us",
            "\t0 User": 0.00020100000000411455,
            "\t1 System": 0.0001610000000002998
          },
          {
            "name": "BM_tbb_very_idle/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 104071.60714800868,
            "cpu_time": 57.28571428572796,
            "time_unit": "us",
            "\t0 User": 0.002512999999993326,
            "\t1 System": 0.000700000000000145
          },
          {
            "name": "BM_tbb_very_idle/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 104046.22029247029,
            "cpu_time": 54.42857142869327,
            "time_unit": "us",
            "\t0 User": 0.004127000000011094,
            "\t1 System": 0.0010929999999991225
          },
          {
            "name": "BM_tbb_very_idle/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102836.98800152967,
            "cpu_time": 66.57142857185201,
            "time_unit": "us",
            "\t0 User": 0.006748000000001753,
            "\t1 System": 0.005035000000001233
          },
          {
            "name": "BM_tbb_very_idle/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 104953.79771704653,
            "cpu_time": 64.4285714283147,
            "time_unit": "us",
            "\t0 User": 0.010867999999987887,
            "\t1 System": 0.019202999999999193
          },
          {
            "name": "BM_tbb_very_idle/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_very_idle/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 104008.9107118547,
            "cpu_time": 62.999999999797346,
            "time_unit": "us",
            "\t0 User": 0.015360999999998626,
            "\t1 System": 0.032249999999999446
          },
          {
            "name": "BM_tbb_very_idle/12/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_very_idle/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 103789.22613537205,
            "cpu_time": 57.57142857122842,
            "time_unit": "us",
            "\t0 User": 0.0262429999999938,
            "\t1 System": 0.06250200000000028
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 260148,
            "real_time": 2.6099716967115327,
            "cpu_time": 2.6089149253501844,
            "time_unit": "us",
            "\t0 User": 1.1533160000000038,
            "\t1 System": 0.19860800000000012
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 263839,
            "real_time": 2.876337148986388,
            "cpu_time": 2.854790231921745,
            "time_unit": "us",
            "\t0 User": 1.2549109999999928,
            "\t1 System": 0.23581300000000027
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 153884,
            "real_time": 4.577939389659891,
            "cpu_time": 4.482070910555981,
            "time_unit": "us",
            "\t0 User": 1.3382270000000034,
            "\t1 System": 0.5144420000000007
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112524,
            "real_time": 6.169086150290484,
            "cpu_time": 5.495423198606512,
            "time_unit": "us",
            "\t0 User": 1.3762290000000093,
            "\t1 System": 0.8164660000000019
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20602,
            "real_time": 33.59804067846777,
            "cpu_time": 29.75643141442579,
            "time_unit": "us",
            "\t0 User": 2.0157329999999973,
            "\t1 System": 1.1087130000000016
          },
          {
            "name": "BM_dispenso_mostly_idle/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_mostly_idle/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5476,
            "real_time": 120.55436597638365,
            "cpu_time": 105.71146822498193,
            "time_unit": "us",
            "\t0 User": 2.810654999999997,
            "\t1 System": 1.3284559999999992
          },
          {
            "name": "BM_dispenso_mostly_idle/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_mostly_idle/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7106,
            "real_time": 99.13712003313648,
            "cpu_time": 70.1355192794827,
            "time_unit": "us",
            "\t0 User": 2.0601170000000053,
            "\t1 System": 3.660781
          },
          {
            "name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68160,
            "real_time": 10.086900307693051,
            "cpu_time": 10.080399061032837,
            "time_unit": "us",
            "\t0 User": 1.162080000000003,
            "\t1 System": 0.20879199999999898
          },
          {
            "name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73794,
            "real_time": 9.700433531144588,
            "cpu_time": 9.694243434425518,
            "time_unit": "us",
            "\t0 User": 1.2136150000000043,
            "\t1 System": 0.21590999999999738
          },
          {
            "name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30944,
            "real_time": 19.14006889841207,
            "cpu_time": 16.939859100310166,
            "time_unit": "us",
            "\t0 User": 0.9157560000000018,
            "\t1 System": 0.31224500000000077
          },
          {
            "name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31100,
            "real_time": 23.499718650882027,
            "cpu_time": 19.493022508038457,
            "time_unit": "us",
            "\t0 User": 1.1045599999999922,
            "\t1 System": 0.5644500000000008
          },
          {
            "name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15112,
            "real_time": 47.57320341472196,
            "cpu_time": 43.25046320804671,
            "time_unit": "us",
            "\t0 User": 1.799869000000001,
            "\t1 System": 0.9667640000000013
          },
          {
            "name": "BM_dispenso_mostly_idle/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_mostly_idle/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1221,
            "real_time": 541.0529615320906,
            "cpu_time": 474.58476658476536,
            "time_unit": "us",
            "\t0 User": 2.858187000000001,
            "\t1 System": 1.3863130000000012
          },
          {
            "name": "BM_dispenso_mostly_idle/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_mostly_idle/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2201,
            "real_time": 456.7189913485089,
            "cpu_time": 307.7010449795519,
            "time_unit": "us",
            "\t0 User": 3.172341000000003,
            "\t1 System": 5.233697000000003
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 809,
            "real_time": 784.5392460475488,
            "cpu_time": 784.0284301606827,
            "time_unit": "us",
            "\t0 User": 1.0710940000000022,
            "\t1 System": 0.1968570000000014
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 921,
            "real_time": 767.9795059623211,
            "cpu_time": 764.0553745928297,
            "time_unit": "us",
            "\t0 User": 1.1941690000000023,
            "\t1 System": 0.2163579999999996
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 893,
            "real_time": 781.1575677727682,
            "cpu_time": 770.5419932810825,
            "time_unit": "us",
            "\t0 User": 1.1697070000000025,
            "\t1 System": 0.22031500000000648
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 853,
            "real_time": 794.2233294421088,
            "cpu_time": 782.0902696365812,
            "time_unit": "us",
            "\t0 User": 1.1354939999999942,
            "\t1 System": 0.21906800000000004
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 777,
            "real_time": 924.4239060244159,
            "cpu_time": 898.4993564993514,
            "time_unit": "us",
            "\t0 User": 1.2408520000000038,
            "\t1 System": 0.3254620000000017
          },
          {
            "name": "BM_dispenso_mostly_idle/8/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_mostly_idle/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 9090.774577672188,
            "cpu_time": 8351.012820512802,
            "time_unit": "us",
            "\t0 User": 2.418918000000005,
            "\t1 System": 2.3314730000000026
          },
          {
            "name": "BM_dispenso_mostly_idle/12/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_mostly_idle/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5547.0087507274,
            "cpu_time": 3658.550000000105,
            "time_unit": "us",
            "\t0 User": 1.2764270000000124,
            "\t1 System": 4.151496000000002
          },
          {
            "name": "BM_dispenso_very_idle/1/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 103949.43443991776,
            "cpu_time": 33.57142857294905,
            "time_unit": "us",
            "\t0 User": 0.00010199999999827014,
            "\t1 System": 0.00014600000000086766
          },
          {
            "name": "BM_dispenso_very_idle/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102323.14886525273,
            "cpu_time": 35.14285714244027,
            "time_unit": "us",
            "\t0 User": 0.003084000000001197,
            "\t1 System": 0.009870999999996855
          },
          {
            "name": "BM_dispenso_very_idle/3/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102151.94643075977,
            "cpu_time": 30.571428570372714,
            "time_unit": "us",
            "\t0 User": 0.0059219999999982065,
            "\t1 System": 0.018789999999995644
          },
          {
            "name": "BM_dispenso_very_idle/4/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 101760.25587134063,
            "cpu_time": 34.571428570424295,
            "time_unit": "us",
            "\t0 User": 0.010214999999988095,
            "\t1 System": 0.034114999999999895
          },
          {
            "name": "BM_dispenso_very_idle/6/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 101917.30356642178,
            "cpu_time": 29.14285714337796,
            "time_unit": "us",
            "\t0 User": 0.01927899999998317,
            "\t1 System": 0.09242600000000323
          },
          {
            "name": "BM_dispenso_very_idle/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_very_idle/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102839.75000493228,
            "cpu_time": 35.14285714345533,
            "time_unit": "us",
            "\t0 User": 0.02798700000002441,
            "\t1 System": 0.15623599999999982
          },
          {
            "name": "BM_dispenso_very_idle/12/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_very_idle/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 101509.90472175181,
            "cpu_time": 33.14285714241448,
            "time_unit": "us",
            "\t0 User": 0.05967300000000364,
            "\t1 System": 0.3277100000000033
          }
        ]
      }
    },
    {
      "name": "locality_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:27:03-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/locality_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            8.47461,
            5.87061,
            4.08057
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2821,
            "real_time": 247876.72809881443,
            "cpu_time": 247864.9415101027,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 10068703.57086882,
            "cpu_time": 10067757.142857144,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 80385893.55742766,
            "cpu_time": 80373444.4444444,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1568,
            "real_time": 445868.729002541,
            "cpu_time": 445842.4744897962,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3357,
            "real_time": 205472.97945906932,
            "cpu_time": 205398.86803693787,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4018,
            "real_time": 173522.08809559952,
            "cpu_time": 172416.87406669997,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4036,
            "real_time": 173656.44600198994,
            "cpu_time": 172548.81070366694,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4023,
            "real_time": 173070.272941234,
            "cpu_time": 172186.42803877709,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 15213417.587801814,
            "cpu_time": 15212282.608695637,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 170,
            "real_time": 4108408.823445001,
            "cpu_time": 4107805.882352946,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 285,
            "real_time": 2564908.771595934,
            "cpu_time": 2486119.2982456074,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 274,
            "real_time": 2547032.087633427,
            "cpu_time": 2485773.7226277343,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 276,
            "real_time": 2570804.0435953685,
            "cpu_time": 2505634.0579710184,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 120765617.99040063,
            "cpu_time": 120737000.0000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33021849.24195033,
            "cpu_time": 33017999.99999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 23729265.7959564,
            "cpu_time": 22879758.620689657,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 23557516.66861276,
            "cpu_time": 22843166.666666668,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 23197238.90007784,
            "cpu_time": 22457666.666666515,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2730,
            "real_time": 256090.7359074651,
            "cpu_time": 256080.5860805866,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5107,
            "real_time": 133841.48384591582,
            "cpu_time": 132877.81476404925,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6666,
            "real_time": 110695.41330053339,
            "cpu_time": 107643.71437143696,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3921,
            "real_time": 162321.85666438678,
            "cpu_time": 136821.21907676623,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2502,
            "real_time": 287738.9424298849,
            "cpu_time": 244803.35731414816,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 10075088.700146547,
            "cpu_time": 10074228.571428584,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 156,
            "real_time": 4509631.679423201,
            "cpu_time": 4499006.410256402,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 266,
            "real_time": 2717709.7443036065,
            "cpu_time": 2465454.887218058,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 252,
            "real_time": 2859782.0796892936,
            "cpu_time": 2123468.2539682547,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 255,
            "real_time": 2831912.5803297055,
            "cpu_time": 2403674.5098039303,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 80980611.11134787,
            "cpu_time": 80974000.00000042,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 36509410.049276136,
            "cpu_time": 36472526.31578948,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 26487858.03898203,
            "cpu_time": 24531777.77777769,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 24607481.310497325,
            "cpu_time": 8468931.034482587,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 28115873.483733997,
            "cpu_time": 11614518.518518515,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2650,
            "real_time": 260737.31134979508,
            "cpu_time": 260725.28301886946,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5660,
            "real_time": 126822.4602494415,
            "cpu_time": 126817.8445229688,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2963,
            "real_time": 233759.12656686342,
            "cpu_time": 226565.98042524222,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 798,
            "real_time": 890183.0614990272,
            "cpu_time": 869269.4235588984,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 399,
            "real_time": 1744616.6465763834,
            "cpu_time": 1615571.4285714277,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 10334427.074720973,
            "cpu_time": 10333426.470588194,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4752867.173828535,
            "cpu_time": 4752684.563758365,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 251,
            "real_time": 2846860.394078451,
            "cpu_time": 2787199.203187227,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 3673726.0665410226,
            "cpu_time": 3568743.5897436067,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 141,
            "real_time": 4649647.163121519,
            "cpu_time": 3323007.092198541,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 87189692.75651035,
            "cpu_time": 86646499.99999963,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 38419599.56076203,
            "cpu_time": 38335833.333333604,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 23323620.832525194,
            "cpu_time": 16615833.333333304,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 24227678.173639137,
            "cpu_time": 15181999.999999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 25613959.82463312,
            "cpu_time": 14204357.142857203,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:27:51-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/nested_for_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            7.60645,
            6.12695,
            4.28174
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 315661,
            "real_time": 2229.308628670427,
            "cpu_time": 2228.919632137008,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 5908019.538861658,
            "cpu_time": 5907752.212389381,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 564495479.0477641,
            "cpu_time": 564444500.0000005,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45742,
            "real_time": 14961.287396672922,
            "cpu_time": 14959.11853438853,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58224,
            "real_time": 12207.42807181078,
            "cpu_time": 12206.358202802976,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58388,
            "real_time": 12109.488096784127,
            "cpu_time": 12107.487839967118,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60500,
            "real_time": 11760.601240849077,
            "cpu_time": 11758.87603305787,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54189,
            "real_time": 11638.886580109067,
            "cpu_time": 11634.206204211172,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49948,
            "real_time": 14006.300714716304,
            "cpu_time": 13965.90454072238,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59331,
            "real_time": 12124.174831781393,
            "cpu_time": 12037.198092059794,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 8783293.869591465,
            "cpu_time": 8782671.052631564,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 162,
            "real_time": 4241980.457057555,
            "cpu_time": 4240753.086419762,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 235,
            "real_time": 2869621.8084425055,
            "cpu_time": 2869570.2127659624,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 314,
            "real_time": 2134516.5890350845,
            "cpu_time": 2133245.222929935,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 463,
            "real_time": 1444861.9526096864,
            "cpu_time": 1444755.9395248424,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 597,
            "real_time": 1110844.1524429114,
            "cpu_time": 1110051.9262981608,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 706,
            "real_time": 1001690.9234882687,
            "cpu_time": 962287.5354107652,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 453164749.9534302,
            "cpu_time": 453134000.00000036,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 243543985.998258,
            "cpu_time": 243529666.66666612,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 161323500.01134908,
            "cpu_time": 160891500.00000086,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 112321208.39413257,
            "cpu_time": 112302599.9999996,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 71781819.45346296,
            "cpu_time": 71781777.77777803,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 50853854.17138847,
            "cpu_time": 50781500.00000026,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 43659128.00754554,
            "cpu_time": 41191785.7142858,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 283499,
            "real_time": 2479.9473156576137,
            "cpu_time": 2479.758306025776,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 407262,
            "real_time": 1714.804801569383,
            "cpu_time": 1712.3547004139914,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 472442,
            "real_time": 1468.1176928052757,
            "cpu_time": 1467.3822395129953,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 540041,
            "real_time": 1326.2928186469464,
            "cpu_time": 1325.212344988618,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 239769,
            "real_time": 2478.049009819242,
            "cpu_time": 2468.02547451924,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 250396,
            "real_time": 2678.877158411769,
            "cpu_time": 2669.667247080581,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115889,
            "real_time": 4652.9743209281805,
            "cpu_time": 4159.126405439686,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 5935303.946898784,
            "cpu_time": 5934946.428571461,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 227,
            "real_time": 3007479.995645025,
            "cpu_time": 3007330.396475751,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 316,
            "real_time": 2061836.6300057666,
            "cpu_time": 2061661.3924050664,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 408,
            "real_time": 1524610.7034138679,
            "cpu_time": 1524321.0784313786,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 592,
            "real_time": 1016174.1976500363,
            "cpu_time": 1015342.905405422,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 816,
            "real_time": 776653.9522837027,
            "cpu_time": 771006.1274509768,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 709,
            "real_time": 841270.4513078536,
            "cpu_time": 813963.328631868,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 583668707.9863622,
            "cpu_time": 583620000.0000033,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 294945667.0321524,
            "cpu_time": 294883499.99999744,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 212272416.6838452,
            "cpu_time": 212058000.00000134,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 188557124.98771027,
            "cpu_time": 188509999.99999908,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 113317701.34468873,
            "cpu_time": 113211333.33333342,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 80586281.241267,
            "cpu_time": 78373125.0000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 64663950.00461489,
            "cpu_time": 62068599.99999992,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 284109,
            "real_time": 2462.673509170537,
            "cpu_time": 2462.2627231097913,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 409303,
            "real_time": 1721.2704694947508,
            "cpu_time": 1719.6795528007417,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 451649,
            "real_time": 1556.5033178025333,
            "cpu_time": 1555.4534605412664,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 523509,
            "real_time": 1325.0999505581974,
            "cpu_time": 1324.5560248247818,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 257513,
            "real_time": 3050.0764350750364,
            "cpu_time": 2862.7525600649183,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 245922,
            "real_time": 2881.858373892265,
            "cpu_time": 2865.7338505704965,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100000,
            "real_time": 5503.386249765754,
            "cpu_time": 5073.849999999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 113,
            "real_time": 5954529.86756521,
            "cpu_time": 5951867.256637184,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 231,
            "real_time": 2944964.46749139,
            "cpu_time": 2944670.995670969,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 335,
            "real_time": 1975472.8868798313,
            "cpu_time": 1974420.8955223255,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 442,
            "real_time": 1476109.4455881154,
            "cpu_time": 1475504.5248868566,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 655,
            "real_time": 988000.4443569493,
            "cpu_time": 987865.6488549435,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 863,
            "real_time": 769253.235271466,
            "cpu_time": 759950.1738122725,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1002,
            "real_time": 638253.1187164332,
            "cpu_time": 623100.798403208,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 553281709.0861499,
            "cpu_time": 553260999.999992,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_auto/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 288024207.984563,
            "cpu_time": 287993999.9999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_auto/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 179009603.9767377,
            "cpu_time": 178983249.99999815,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_auto/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 138357441.59296155,
            "cpu_time": 138338400.00000066,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_auto/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 86717338.49041629,
            "cpu_time": 86623000.00000122,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_auto/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 61907262.5064291,
            "cpu_time": 61857499.99999927,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_auto/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 48208218.74697382,
            "cpu_time": 47145916.66666597,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:29:14-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/nested_pool_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            7.03613,
            6.20459,
            4.46875
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1028,
            "real_time": 677.9331226057667,
            "cpu_time": 677.8531128404667,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1295,
            "real_time": 542.970623957662,
            "cpu_time": 542.9559845559845,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1199,
            "real_time": 587.9234428400231,
            "cpu_time": 587.2452043369475,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1282,
            "real_time": 555.4199165536756,
            "cpu_time": 555.3510140405615,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1180,
            "real_time": 587.1199152306101,
            "cpu_time": 587.0601694915248,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1259,
            "real_time": 563.036702108008,
            "cpu_time": 561.6036536934075,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1252,
            "real_time": 577.751663792432,
            "cpu_time": 570.4448881789134,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31,
            "real_time": 21761.02016106128,
            "cpu_time": 21758.7741935484,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 17681.982731673776,
            "cpu_time": 17681.317073170736,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 15489.978723208162,
            "cpu_time": 15489.255319148937,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 12552.85566107237,
            "cpu_time": 12551.392857142828,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 11108.409728022349,
            "cpu_time": 11107.378787878779,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 10250.611914255258,
            "cpu_time": 10246.157142857142,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 9308.678650113783,
            "cpu_time": 9081.318181818167,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3296486.375038512,
            "cpu_time": 3296262.0000000005,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2090060.2079927921,
            "cpu_time": 2089933.0000000002,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1534770.958009176,
            "cpu_time": 1534744.9999999974,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1185793.9169276506,
            "cpu_time": 1185774.000000002,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 867856.0410626233,
            "cpu_time": 867816.9999999987,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 734020.6250082701,
            "cpu_time": 730373.0000000001,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 778120.04100997,
            "cpu_time": 748463.0000000009,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_folly/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108,
            "real_time": 6308.988814621612,
            "cpu_time": 23.16666666666465,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_folly/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 6073.80393713746,
            "cpu_time": 19.098214285743047,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_folly/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 124,
            "real_time": 5519.547379545627,
            "cpu_time": 18.088709677419313,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_folly/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 131,
            "real_time": 5577.856549488395,
            "cpu_time": 18.78625954198678,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_folly/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 68,
            "real_time": 10230.621324701453,
            "cpu_time": 29.02941176466226,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_folly/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 15092.790765846346,
            "cpu_time": 30.021276595764665,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_folly/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 26041.050924471132,
            "cpu_time": 28.703703703867077,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_folly/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 228647.87497868142,
            "cpu_time": 469.6666666674787,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_folly/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 199361.65264031538,
            "cpu_time": 423.00000000021026,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_folly/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 176662.8750192467,
            "cpu_time": 418.9999999999472,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_folly/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 179852.5414778851,
            "cpu_time": 396.250000000542,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_folly/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 333325.68750483915,
            "cpu_time": 558.5000000003504,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_folly/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 512416.7500762269,
            "cpu_time": 562.9999999960944,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_folly/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 881811.0000574961,
            "cpu_time": 677.9999999970698,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_folly/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_folly/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_folly/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_folly/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_folly/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_folly/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_folly/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "We run out of memory here with too many elements",
            "iterations": 1,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2946,
            "real_time": 237.19465645511926,
            "cpu_time": 237.17956551256015,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4642,
            "real_time": 151.14610276406873,
            "cpu_time": 151.1314088754846,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4182,
            "real_time": 157.7835167379135,
            "cpu_time": 157.772118603539,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4872,
            "real_time": 142.0926124703182,
            "cpu_time": 142.08394909687973,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2379,
            "real_time": 308.4210979784735,
            "cpu_time": 308.2702816309369,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1802,
            "real_time": 401.3422819099427,
            "cpu_time": 401.29245283019077,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1159,
            "real_time": 565.7262010323052,
            "cpu_time": 563.767903364968,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 7160.414123137919,
            "cpu_time": 7159.673469387778,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 319,
            "real_time": 2193.1773761789486,
            "cpu_time": 2193.103448275863,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 279,
            "real_time": 2517.0265841314495,
            "cpu_time": 2516.960573476723,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 281,
            "real_time": 2530.2844020137,
            "cpu_time": 2530.2704626334685,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 161,
            "real_time": 4163.695912900734,
            "cpu_time": 4161.645962732935,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 175,
            "real_time": 4653.025000090046,
            "cpu_time": 4650.46285714286,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 157,
            "real_time": 4338.411623938306,
            "cpu_time": 4300.312101910812,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1148256.374988705,
            "cpu_time": 1148195.0000000012,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 187597.614742117,
            "cpu_time": 187582.7500000007,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 196777.96875475906,
            "cpu_time": 196772.74999999916,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 209864.9999825284,
            "cpu_time": 209837.66666666765,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 281596.97198619443,
            "cpu_time": 281591.33333333133,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 356942.9999915883,
            "cpu_time": 356914.50000000204,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 444069.0830233507,
            "cpu_time": 440850.4999999963,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10023,
            "real_time": 69.99702763920337,
            "cpu_time": 69.99151950513784,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5715,
            "real_time": 121.31665213227481,
            "cpu_time": 121.31093613298454,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6434,
            "real_time": 109.35908843443853,
            "cpu_time": 109.35265775567309,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6477,
            "real_time": 108.74788342634731,
            "cpu_time": 108.74170140497075,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3989,
            "real_time": 175.95745601048554,
            "cpu_time": 175.94835798445584,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4946,
            "real_time": 145.60373717097258,
            "cpu_time": 145.21977355438764,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3850,
            "real_time": 185.18111481053103,
            "cpu_time": 179.47974025974105,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 370,
            "real_time": 1895.4101352121781,
            "cpu_time": 1895.3027027027024,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 400,
            "real_time": 1733.7116674752906,
            "cpu_time": 1730.1424999999958,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 489,
            "real_time": 1413.0601574664893,
            "cpu_time": 1413.0020449897936,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 573,
            "real_time": 1257.5391203369848,
            "cpu_time": 1257.4520069808036,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 389,
            "real_time": 1902.5739074677497,
            "cpu_time": 1902.5655526992189,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 398,
            "real_time": 1790.606889443182,
            "cpu_time": 1790.5276381909478,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 399,
            "real_time": 1817.8500427063882,
            "cpu_time": 1806.2932330826925,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 292041.08350677416,
            "cpu_time": 292020.0000000008,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_bulk/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 130980.92500586063,
            "cpu_time": 130921.60000000063,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_bulk/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 111263.52084102109,
            "cpu_time": 111255.66666666487,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_bulk/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100296.16670882596,
            "cpu_time": 100286.71428571353,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_bulk/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 103861.78570999099,
            "cpu_time": 103856.2857142864,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_bulk/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 82522.8843346445,
            "cpu_time": 82511.33333333232,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/300000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_bulk/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 85837.98962354194,
            "cpu_time": 84001.24999999914,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "once_function_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:30:28-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/once_function_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            7.44824,
            6.52588,
            4.73535
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_move_std_function<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29159984,
            "real_time": 24.158670975557346,
            "cpu_time": 24.154334241061314,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kSmallSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 133832976,
            "real_time": 5.204623791909096,
            "cpu_time": 5.202342657313395,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kMediumSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28263872,
            "real_time": 24.550034826692528,
            "cpu_time": 24.539595990245072,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kMediumSize>",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100822423,
            "real_time": 6.812845839092673,
            "cpu_time": 6.812442902706269,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kLargeSize>",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27128103,
            "real_time": 24.582920522600364,
            "cpu_time": 24.580819381288848,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kLargeSize>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94187298,
            "real_time": 7.438441518400829,
            "cpu_time": 7.437828824859161,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kExtraLargeSize>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15766547,
            "real_time": 44.21355443294617,
            "cpu_time": 44.21056810980873,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kExtraLargeSize>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25323691,
            "real_time": 27.811982148424182,
            "cpu_time": 27.808702925651758,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kSmallSize>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1117640,
            "real_time": 627.0593616821525,
            "cpu_time": 627.0140653519911,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kSmallSize>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 257965,
            "real_time": 2734.0989397390713,
            "cpu_time": 2733.9522803481077,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kSmallSize>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 500529,
            "real_time": 1398.8958840787318,
            "cpu_time": 1398.7481244842943,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kMediumSize>",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 622244,
            "real_time": 1124.6337175150072,
            "cpu_time": 1124.5009996078684,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kMediumSize>",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 237325,
            "real_time": 2964.939429110469,
            "cpu_time": 2964.613926050771,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kMediumSize>",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 445494,
            "real_time": 1539.047102661202,
            "cpu_time": 1538.411291734571,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kLargeSize>",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 386328,
            "real_time": 1817.4631815245443,
            "cpu_time": 1817.300325112339,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kLargeSize>",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 212673,
            "real_time": 3246.7841850951504,
            "cpu_time": 3246.5193042840533,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kLargeSize>",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 370835,
            "real_time": 1890.5727399449413,
            "cpu_time": 1890.118246659571,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kExtraLargeSize>",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92501,
            "real_time": 7571.629852905779,
            "cpu_time": 7571.291121177055,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kExtraLargeSize>",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70075,
            "real_time": 10034.847177918622,
            "cpu_time": 10034.220478059206,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kExtraLargeSize>",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70283,
            "real_time": 10222.249092835804,
            "cpu_time": 10153.066886729386,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pipeline_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:30:47-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/pipeline_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            5.75977,
            6.19922,
            4.65918
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 114005916.67431097,
            "cpu_time": 113784333.33333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 60666458.33197981,
            "cpu_time": 13889000.000000022,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 105327486.00235207,
            "cpu_time": 93244333.33333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100358023.85703261,
            "cpu_time": 17142.857142842568,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_par/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53,
            "real_time": 13244808.961215585,
            "cpu_time": 91547.16981131386,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_par/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 18570681.720221832,
            "cpu_time": 18184888.88888889,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow_par/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100174589.2821678,
            "cpu_time": 17428.57142866023,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pool_allocator_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:30:54-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/pool_allocator_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.33936,
            6.31201,
            4.70801
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_mallocfree<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2356,
            "real_time": 297837.8959717182,
            "cpu_time": 297824.7028862479,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 520,
            "real_time": 1393100.5615561914,
            "cpu_time": 1393005.76923077,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19876,
            "real_time": 35381.84665267378,
            "cpu_time": 35378.949486818274,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4838,
            "real_time": 144298.9355109866,
            "cpu_time": 144289.16907813158,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21631,
            "real_time": 32329.23618875474,
            "cpu_time": 32326.66081087326,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5280,
            "real_time": 133055.0820002276,
            "cpu_time": 133047.15909090906,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2241,
            "real_time": 312082.44089198427,
            "cpu_time": 312065.5957161986,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 552,
            "real_time": 1286931.7644694145,
            "cpu_time": 1286865.9420289889,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19848,
            "real_time": 35833.98831308906,
            "cpu_time": 35831.36839983868,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4842,
            "real_time": 144236.78231143608,
            "cpu_time": 144227.59190417174,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21665,
            "real_time": 32361.86439259558,
            "cpu_time": 32348.811447034383,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5285,
            "real_time": 132326.7975434407,
            "cpu_time": 132322.23273415354,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/8192",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 3967560.369346757,
            "cpu_time": 3967323.863636367,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/32768",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 15925258.522408761,
            "cpu_time": 15924363.636363614,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/8192",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17940,
            "real_time": 35744.06120115258,
            "cpu_time": 35742.14046822741,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/32768",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4767,
            "real_time": 146309.707341323,
            "cpu_time": 146232.43129851043,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21497,
            "real_time": 32453.493513815025,
            "cpu_time": 32447.829929757707,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5251,
            "real_time": 133103.29765478693,
            "cpu_time": 133091.22071986325,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33396,
            "real_time": 21265.480894466433,
            "cpu_time": 21263.92382321245,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8200,
            "real_time": 86182.02232244628,
            "cpu_time": 85796.46341463391,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33213,
            "real_time": 21064.785143470246,
            "cpu_time": 21050.55249450521,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8287,
            "real_time": 84566.9069640627,
            "cpu_time": 84562.44720646809,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33444,
            "real_time": 21059.53734711275,
            "cpu_time": 21037.555316349786,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8180,
            "real_time": 85263.86002597825,
            "cpu_time": 85244.37652811747,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33056,
            "real_time": 21147.46064142755,
            "cpu_time": 21146.02492739597,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 12,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8167,
            "real_time": 85567.00244149553,
            "cpu_time": 85494.79613076992,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33165,
            "real_time": 21021.85662787536,
            "cpu_time": 21007.990351274002,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 13,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8069,
            "real_time": 86704.20746064727,
            "cpu_time": 86542.19853761289,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33469,
            "real_time": 21179.439691280913,
            "cpu_time": 21146.10535122043,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 14,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8248,
            "real_time": 84837.98606465747,
            "cpu_time": 84820.19883608156,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 552,
            "real_time": 1257238.148684627,
            "cpu_time": 1257005.4347826105,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "family_index": 15,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 116,
            "real_time": 5947916.66411891,
            "cpu_time": 5939586.206896573,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3578,
            "real_time": 191077.9184113632,
            "cpu_time": 189247.06539966396,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "family_index": 16,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 645,
            "real_time": 1017347.9984911489,
            "cpu_time": 1015550.3875968937,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 563,
            "real_time": 1016821.344399603,
            "cpu_time": 1016788.6323268178,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "family_index": 17,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 359,
            "real_time": 1979630.9191059102,
            "cpu_time": 1979392.7576601515,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3658,
            "real_time": 193045.1749578554,
            "cpu_time": 191059.32203389838,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "family_index": 18,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 635,
            "real_time": 1071525.3937138817,
            "cpu_time": 1068973.2283464575,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 17206997.976910956,
            "cpu_time": 17203731.707317133,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "family_index": 19,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 69498659.09039296,
            "cpu_time": 69488272.72727245,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3755,
            "real_time": 191963.53768421395,
            "cpu_time": 189964.0479360856,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "family_index": 20,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 692,
            "real_time": 1022619.2788928854,
            "cpu_time": 1018784.682080926,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 189,
            "real_time": 3733305.555581061,
            "cpu_time": 3715592.592592592,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "family_index": 21,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 15166702.127936196,
            "cpu_time": 15127255.319148868,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 21899995.53541254,
            "cpu_time": 21748500.00000004,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "family_index": 22,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 94885845.79667076,
            "cpu_time": 94108399.99999964,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 189,
            "real_time": 3693569.2224355917,
            "cpu_time": 3686539.6825396856,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "family_index": 23,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 15767791.676586743,
            "cpu_time": 15711972.972973078,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 17763089.136375736,
            "cpu_time": 17554666.66666668,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "family_index": 24,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 110689090.91409296,
            "cpu_time": 110072545.45454517,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 98668744.8748853,
            "cpu_time": 96294874.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "family_index": 25,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 387625041.53419286,
            "cpu_time": 380588000.00000304,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 25595886.67936623,
            "cpu_time": 25404639.99999986,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "family_index": 26,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 73615657.45021734,
            "cpu_time": 72802444.44444388,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 8368770.837058457,
            "cpu_time": 7197375.000000028,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "family_index": 27,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 34217269.91402228,
            "cpu_time": 25368260.869565144,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 13813426.250126215,
            "cpu_time": 12982530.000000026,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "family_index": 28,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 98524399.99813214,
            "cpu_time": 91578100.00000027,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90,
            "real_time": 9259785.644503104,
            "cpu_time": 7735211.111111085,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "family_index": 29,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 40081666.66608304,
            "cpu_time": 34279888.888889335,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 16729679.932309823,
            "cpu_time": 15998954.545454454,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "family_index": 30,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 111432819.50110881,
            "cpu_time": 106378833.33333361,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 218771116.80805683,
            "cpu_time": 155988200.00000018,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "family_index": 31,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 844508291.920647,
            "cpu_time": 547278999.9999961,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 6825904.255396033,
            "cpu_time": 6037446.808510734,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "family_index": 32,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 68130451.39036451,
            "cpu_time": 63474833.33333336,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "rw_lock_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:32:00-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/rw_lock_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.03369,
            6.0498,
            4.71484
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<NopMutex>/2/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<NopMutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2433,
            "real_time": 285183.70649920136,
            "cpu_time": 285143.4443074394,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/8/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<NopMutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2439,
            "real_time": 286609.3173463474,
            "cpu_time": 286583.84583845834,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/32/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<NopMutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2367,
            "real_time": 291821.08153557987,
            "cpu_time": 291746.9370511193,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/128/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<NopMutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1987,
            "real_time": 333605.1836488914,
            "cpu_time": 333556.61801711126,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/512/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<NopMutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2289,
            "real_time": 304676.5871115709,
            "cpu_time": 304628.65880297095,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<std::shared_mutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 10781069.015138201,
            "cpu_time": 10778625.000000013,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<std::shared_mutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 66,
            "real_time": 10617996.84998043,
            "cpu_time": 10614757.57575757,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<std::shared_mutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 10062712.499140095,
            "cpu_time": 10054342.857142868,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/128/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<std::shared_mutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 71,
            "real_time": 9875637.323359473,
            "cpu_time": 9872225.352112692,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/512/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<std::shared_mutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 9892706.59596918,
            "cpu_time": 9891500.000000017,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<dispenso::RWLock>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 206,
            "real_time": 3389171.1167050796,
            "cpu_time": 3388742.7184466114,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<dispenso::RWLock>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 207,
            "real_time": 3393791.261093088,
            "cpu_time": 3387946.8599033887,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/32/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<dispenso::RWLock>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 212,
            "real_time": 3316036.556772712,
            "cpu_time": 3315665.094339622,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/128/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<dispenso::RWLock>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 210,
            "real_time": 3430593.8475188753,
            "cpu_time": 3426319.047619053,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/512/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<dispenso::RWLock>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 211,
            "real_time": 3324420.814804062,
            "cpu_time": 3323838.862559244,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65,
            "real_time": 10857366.030820861,
            "cpu_time": 10836446.15384615,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 48073186.1355404,
            "cpu_time": 32678199.999999907,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 223555250.01573065,
            "cpu_time": 123011000.00000058,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<std::shared_mutex>/8/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 868961500.0039339,
            "cpu_time": 424693999.9999988,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 65,
            "real_time": 10633065.384955935,
            "cpu_time": 10621769.230769254,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 48234949.354082346,
            "cpu_time": 33167999.999999862,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 211338639.02650774,
            "cpu_time": 112529000.00000091,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<std::shared_mutex>/8/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 945047250.0175238,
            "cpu_time": 451454999.9999993,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 69,
            "real_time": 10262880.435523886,
            "cpu_time": 10247072.463768069,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 51817922.64504891,
            "cpu_time": 37626357.142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 210097471.98938084,
            "cpu_time": 115275999.99999912,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<std::shared_mutex>/8/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 902531540.9526229,
            "cpu_time": 425050999.9999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 10063079.757882016,
            "cpu_time": 10060485.714285707,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 64839170.908089735,
            "cpu_time": 52157100.00000016,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 201540541.5149871,
            "cpu_time": 116553999.99999982,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_parallel<std::shared_mutex>/8/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 769367125.0017359,
            "cpu_time": 388481999.99999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 10008825.586243933,
            "cpu_time": 10002971.42857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 107829791.57686765,
            "cpu_time": 97136714.28571402,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 187002697.9886461,
            "cpu_time": 118212499.99999939,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_parallel<std::shared_mutex>/8/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 527345874.9288693,
            "cpu_time": 312021999.9999989,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197,
            "real_time": 3584332.2789863814,
            "cpu_time": 3581928.9340101467,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 29757812.502793968,
            "cpu_time": 29736636.363636456,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 153225666.7560432,
            "cpu_time": 153049750.0000001,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<dispenso::RWLock>/8/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1005173167.0973823,
            "cpu_time": 1004213000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 198,
            "real_time": 3549493.4747494156,
            "cpu_time": 3547525.2525252188,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 25392336.44320457,
            "cpu_time": 25371259.259259205,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 129610604.16031007,
            "cpu_time": 129506500.00000034,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<dispenso::RWLock>/8/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 681908959.0571821,
            "cpu_time": 681435999.999998,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 199,
            "real_time": 3500579.1455591517,
            "cpu_time": 3498834.1708542528,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 19340842.104467906,
            "cpu_time": 19313736.84210526,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 93561865.77739815,
            "cpu_time": 93421777.7777775,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<dispenso::RWLock>/8/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 421600875.0372566,
            "cpu_time": 421406000.0000011,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 3494274.7902823617,
            "cpu_time": 3491900.0000000014,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 18518503.631323416,
            "cpu_time": 18468217.391304404,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 80677546.32793367,
            "cpu_time": 80408777.77777824,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_parallel<dispenso::RWLock>/8/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 303473750.01292676,
            "cpu_time": 302208499.999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 199,
            "real_time": 3497630.653168126,
            "cpu_time": 3496311.557788975,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 15670082.399932045,
            "cpu_time": 15651911.111111064,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 72606312.49907117,
            "cpu_time": 72521833.33333306,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_parallel<dispenso::RWLock>/8/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 345658187.5216216,
            "cpu_time": 345167499.99999875,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:32:53-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/simple_for_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            5.40771,
            5.80957,
            4.70264
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000000,
            "real_time": 3.7497375160455705e-10,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000000,
            "real_time": 3.33995558321476e-10,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000000000000,
            "real_time": 4.159519448876381e-10,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87913,
            "real_time": 8165.779896374998,
            "cpu_time": 8153.458532867718,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86321,
            "real_time": 8037.026042295156,
            "cpu_time": 8026.0770843711225,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 86218,
            "real_time": 8037.461237201799,
            "cpu_time": 8025.818274606237,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84666,
            "real_time": 8240.657407288209,
            "cpu_time": 8225.580516382017,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83979,
            "real_time": 8341.751115465577,
            "cpu_time": 8328.117743721641,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84502,
            "real_time": 8312.600589860573,
            "cpu_time": 8297.992946912495,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 110625,
            "real_time": 6706.632388555734,
            "cpu_time": 6690.350282485884,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6960,
            "real_time": 100227.72385754847,
            "cpu_time": 100207.90229885063,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11654,
            "real_time": 59484.88355694319,
            "cpu_time": 59474.85841771054,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12657,
            "real_time": 52727.867978380484,
            "cpu_time": 52707.67164414962,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16345,
            "real_time": 43392.40593667623,
            "cpu_time": 43369.53196696231,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20370,
            "real_time": 34315.32071344646,
            "cpu_time": 34303.681885125225,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22358,
            "real_time": 31916.541818011847,
            "cpu_time": 31848.42114679306,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23912,
            "real_time": 29117.38528583078,
            "cpu_time": 28830.29441284716,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 95,
            "real_time": 7338198.684891196,
            "cpu_time": 7337063.157894749,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 4650172.735070994,
            "cpu_time": 4649152.317880792,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 3990181.3467043787,
            "cpu_time": 3989914.7727272646,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 193,
            "real_time": 3615828.155578534,
            "cpu_time": 3614886.0103627034,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 199,
            "real_time": 3496726.758979336,
            "cpu_time": 3496502.5125628086,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 3519768.33480876,
            "cpu_time": 3421474.999999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 3609583.9740039827,
            "cpu_time": 3455071.7948717824,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 353843.63749762997,
            "cpu_time": 24622.4999999999,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_taskflow/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4077,
            "real_time": 611494.5324984838,
            "cpu_time": 13285.504047094599,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_taskflow/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4679,
            "real_time": 676986.7403423571,
            "cpu_time": 15305.407138277471,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_taskflow/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2782,
            "real_time": 652553.273910907,
            "cpu_time": 14242.271746946417,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_taskflow/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2542,
            "real_time": 673465.8076437403,
            "cpu_time": 15416.601101496424,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_taskflow/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2658,
            "real_time": 669435.7599653136,
            "cpu_time": 18573.363431153026,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_taskflow/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2028,
            "real_time": 640626.19177924,
            "cpu_time": 17914.20118343172,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_taskflow/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 278,
            "real_time": 6618754.4964528605,
            "cpu_time": 5417.266187047191,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_taskflow/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 456,
            "real_time": 7100949.651334974,
            "cpu_time": 6157.894736838038,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_taskflow/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 697,
            "real_time": 6924421.50928823,
            "cpu_time": 7484.935437587161,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_taskflow/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 827,
            "real_time": 6658505.492121828,
            "cpu_time": 8733.978234582537,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_taskflow/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 852,
            "real_time": 6728089.886210299,
            "cpu_time": 7775.821596243732,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_taskflow/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 825,
            "real_time": 6686934.74677363,
            "cpu_time": 36729.69696969999,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_taskflow/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 723,
            "real_time": 6173689.661123745,
            "cpu_time": 14164.59197787083,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_taskflow/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 65613551.46935539,
            "cpu_time": 10705.882352870249,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_taskflow/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 62734740.45696535,
            "cpu_time": 10228.571428630825,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_taskflow/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 55741849.60810605,
            "cpu_time": 10926.829268260286,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_taskflow/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 66204095.930727415,
            "cpu_time": 11162.790697582306,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_taskflow/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 66936078.59249988,
            "cpu_time": 11136.3636364265,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_taskflow/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 67894904.34245883,
            "cpu_time": 14318.181818124225,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_taskflow/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 68204006.95072576,
            "cpu_time": 17666.66666672531,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14779640,
            "real_time": 46.04082318406511,
            "cpu_time": 46.02710214863133,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15128295,
            "real_time": 46.121137047132294,
            "cpu_time": 46.11114471260628,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15380987,
            "real_time": 45.6716139252072,
            "cpu_time": 45.62490040463591,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15415975,
            "real_time": 45.31050043920065,
            "cpu_time": 45.30547046164773,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15453771,
            "real_time": 45.043442278045454,
            "cpu_time": 45.030821279802915,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15519117,
            "real_time": 44.65237938838554,
            "cpu_time": 44.631791873210304,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15560843,
            "real_time": 45.36657069061462,
            "cpu_time": 45.323765556917536,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11170,
            "real_time": 53254.1927425466,
            "cpu_time": 53247.53804834396,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27962,
            "real_time": 29055.281845266134,
            "cpu_time": 29042.200128746397,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28081,
            "real_time": 25434.214130765933,
            "cpu_time": 25407.464121647885,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33266,
            "real_time": 20753.614803694283,
            "cpu_time": 20725.4854806711,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32264,
            "real_time": 21763.29528190336,
            "cpu_time": 21584.273493677025,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35056,
            "real_time": 19784.576820456372,
            "cpu_time": 19485.451848471188,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22857,
            "real_time": 30466.086278966693,
            "cpu_time": 27775.911099444216,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 6973216.671030968,
            "cpu_time": 6971770.000000034,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 4434089.062851854,
            "cpu_time": 4429068.749999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 177,
            "real_time": 4035585.920331673,
            "cpu_time": 4030988.7005649637,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 3537435.68695604,
            "cpu_time": 3536692.307692281,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 198,
            "real_time": 3492578.0762363262,
            "cpu_time": 3490898.98989899,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 3488039.7899542004,
            "cpu_time": 3462500.0000000126,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186,
            "real_time": 3816219.086142918,
            "cpu_time": 3563424.731182783,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15423891,
            "real_time": 45.09730502006492,
            "cpu_time": 45.08602919976511,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13882313,
            "real_time": 45.64059318042956,
            "cpu_time": 45.63223722156411,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15478352,
            "real_time": 45.256922898416505,
            "cpu_time": 45.252815028369675,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15634535,
            "real_time": 44.84328820778732,
            "cpu_time": 44.83523174817794,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15729300,
            "real_time": 44.616578037289734,
            "cpu_time": 44.60134907465701,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static_chunk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15534249,
            "real_time": 45.360166752215584,
            "cpu_time": 45.33672660969984,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static_chunk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15393870,
            "real_time": 44.85081562246261,
            "cpu_time": 44.814396899545514,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10560,
            "real_time": 62958.91335784373,
            "cpu_time": 62951.32575757578,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28092,
            "real_time": 30042.53285602966,
            "cpu_time": 30031.85960415796,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27048,
            "real_time": 25885.285711607594,
            "cpu_time": 25784.863945577952,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31950,
            "real_time": 21132.81558624852,
            "cpu_time": 21103.129890453805,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32391,
            "real_time": 21684.45583664198,
            "cpu_time": 21556.481738754417,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static_chunk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35875,
            "real_time": 20245.767724197296,
            "cpu_time": 19851.01045296175,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static_chunk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24873,
            "real_time": 28018.36996152383,
            "cpu_time": 27022.031922164395,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 99,
            "real_time": 6956598.071165082,
            "cpu_time": 6955929.292929221,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 155,
            "real_time": 4425629.03232512,
            "cpu_time": 4425245.161290364,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 4020051.6139686275,
            "cpu_time": 4018971.590909036,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 194,
            "real_time": 3562241.8346843617,
            "cpu_time": 3561979.381443293,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 3488446.670235134,
            "cpu_time": 3486479.999999972,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static_chunk/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 201,
            "real_time": 3477888.0595984813,
            "cpu_time": 3458920.3980099745,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static_chunk/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187,
            "real_time": 3805045.903794826,
            "cpu_time": 3674941.17647065,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15709081,
            "real_time": 44.987582849126134,
            "cpu_time": 44.976978602375674,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15553284,
            "real_time": 45.241509251644985,
            "cpu_time": 45.228647531930086,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15568701,
            "real_time": 45.06578615484587,
            "cpu_time": 44.95525991539128,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15369991,
            "real_time": 44.85204585067788,
            "cpu_time": 44.833272836660406,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15535354,
            "real_time": 45.74117956453297,
            "cpu_time": 45.436428419976906,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto_chunk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15472608,
            "real_time": 45.04289128142026,
            "cpu_time": 45.0277031512715,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto_chunk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15668480,
            "real_time": 45.413671780799255,
            "cpu_time": 45.365153480108646,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10554,
            "real_time": 51190.11351296539,
            "cpu_time": 51183.43755921748,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23155,
            "real_time": 29825.089961697395,
            "cpu_time": 29809.8466853811,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32657,
            "real_time": 21817.611842727216,
            "cpu_time": 21791.652631901456,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35774,
            "real_time": 19956.242662196415,
            "cpu_time": 19931.06725554915,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39144,
            "real_time": 17250.3182854661,
            "cpu_time": 17233.318005314166,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto_chunk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39775,
            "real_time": 17438.931487229056,
            "cpu_time": 17272.130735386552,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto_chunk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30973,
            "real_time": 22678.690566877525,
            "cpu_time": 20921.835146740526,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101,
            "real_time": 6992323.019790768,
            "cpu_time": 6991128.712871251,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154,
            "real_time": 4473483.227737635,
            "cpu_time": 4472623.376623458,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 3990414.062917063,
            "cpu_time": 3988676.1363636064,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 194,
            "real_time": 3610045.9641527343,
            "cpu_time": 3606979.3814432924,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 200,
            "real_time": 3525103.9551803838,
            "cpu_time": 3522005.0000000214,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_auto_chunk/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 201,
            "real_time": 3511311.3583644754,
            "cpu_time": 3461935.323383062,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_auto_chunk/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 3595507.6923784916,
            "cpu_time": 3098923.0769231073,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:35:34-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/simple_pool_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.68408,
            6.21191,
            5.04932
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34927,
            "real_time": 19.8398803228476,
            "cpu_time": 19.838291293268814,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32641,
            "real_time": 21.453196654229036,
            "cpu_time": 21.450905303146346,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31004,
            "real_time": 22.69957908730194,
            "cpu_time": 22.693362146819748,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28977,
            "real_time": 23.50722269228718,
            "cpu_time": 23.50022431583669,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24532,
            "real_time": 28.096659465626274,
            "cpu_time": 28.090330996249786,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22485,
            "real_time": 30.80636348632513,
            "cpu_time": 30.795908383366704,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18943,
            "real_time": 35.63749802625345,
            "cpu_time": 35.59679037111335,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3528,
            "real_time": 198.3208854826877,
            "cpu_time": 198.27494331065776,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3340,
            "real_time": 214.01594311394868,
            "cpu_time": 213.9799401197606,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3142,
            "real_time": 225.85424600509342,
            "cpu_time": 225.76607256524528,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2933,
            "real_time": 236.0107824733456,
            "cpu_time": 235.94374360722833,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2616,
            "real_time": 271.23990176574523,
            "cpu_time": 268.81460244648343,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2325,
            "real_time": 302.4201432583473,
            "cpu_time": 302.2795698924731,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1902,
            "real_time": 350.3743648728832,
            "cpu_time": 349.3275499474242,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 118496.85420747846,
            "cpu_time": 118483.1000000001,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 71578.05089890544,
            "cpu_time": 71564.55555555515,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 48723.08976840801,
            "cpu_time": 48704.99999999987,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 34684.518500903825,
            "cpu_time": 34674.88888888879,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31769.51890451495,
            "cpu_time": 31761.090909090974,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 38521.57458344376,
            "cpu_time": 38509.63157894757,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 39328.79821054245,
            "cpu_time": 39274.894736842136,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_folly/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4692,
            "real_time": 148.68277557375256,
            "cpu_time": 99.80051150895093,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_folly/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4452,
            "real_time": 170.54736633995935,
            "cpu_time": 142.02291105121307,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_folly/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3841,
            "real_time": 182.11511766673323,
            "cpu_time": 173.8208799791715,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_folly/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2680,
            "real_time": 261.15918846509015,
            "cpu_time": 244.00634328358174,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_folly/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2272,
            "real_time": 307.15302201430643,
            "cpu_time": 287.3644366197173,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_folly/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2279,
            "real_time": 335.81870731240946,
            "cpu_time": 300.99648968845923,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_folly/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2250,
            "real_time": 311.4121848613852,
            "cpu_time": 294.1626666666666,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_folly/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 456,
            "real_time": 1534.4369516752888,
            "cpu_time": 1019.6118421052589,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_folly/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 473,
            "real_time": 1571.235376169972,
            "cpu_time": 1365.3911205073998,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_folly/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 396,
            "real_time": 1884.3878383955193,
            "cpu_time": 1862.72727272727,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_folly/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 261,
            "real_time": 2626.10696147328,
            "cpu_time": 2563.134099616855,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_folly/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 227,
            "real_time": 3088.5960000037885,
            "cpu_time": 3004.581497797344,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_folly/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 208,
            "real_time": 3101.0480765521957,
            "cpu_time": 3015.6153846153584,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_folly/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 230,
            "real_time": 3003.0686610742755,
            "cpu_time": 2924.6043478261,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/1/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_folly/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 159814.14599809796,
            "cpu_time": 106667.9999999991,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/2/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_folly/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 170841.8227499351,
            "cpu_time": 147650.24999999988,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/3/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_folly/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 188804.3542567175,
            "cpu_time": 187568.7500000005,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/4/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_folly/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 263194.27764974535,
            "cpu_time": 258315.66666666817,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/6/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_folly/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 296969.9170207605,
            "cpu_time": 289315.9999999995,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/8/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_folly/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 281913.70848799124,
            "cpu_time": 275637.500000002,
            "time_unit": "us"
          },
          {
            "name": "BM_folly/12/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_folly/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 284064.2704977654,
            "cpu_time": 279716.999999998,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104015,
            "real_time": 6.6472640090395885,
            "cpu_time": 6.646849012161651,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 53.13554169842973,
            "cpu_time": 53.11850000000007,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5524,
            "real_time": 115.53375434739398,
            "cpu_time": 115.48117306299766,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4766,
            "real_time": 176.5114003512969,
            "cpu_time": 176.3197650020977,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1915,
            "real_time": 336.5354872048485,
            "cpu_time": 336.19060052219714,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1939,
            "real_time": 366.3849066597106,
            "cpu_time": 366.1000515729758,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1927,
            "real_time": 367.56199222496895,
            "cpu_time": 366.91074208614407,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10454,
            "real_time": 65.35277969934586,
            "cpu_time": 65.34207002104486,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 945,
            "real_time": 791.3980158735757,
            "cpu_time": 790.947089947089,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 678,
            "real_time": 1056.4191873696327,
            "cpu_time": 1056.1253687315668,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 498,
            "real_time": 1480.8765901477043,
            "cpu_time": 1480.3192771084416,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 216,
            "real_time": 3175.5945183923122,
            "cpu_time": 3174.2685185184923,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 201,
            "real_time": 3376.355930463184,
            "cpu_time": 3374.601990049779,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 196,
            "real_time": 4013.4710867946246,
            "cpu_time": 4011.474489795908,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 109,
            "real_time": 6503.729743524155,
            "cpu_time": 6502.495412844067,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 64677.60609335859,
            "cpu_time": 64671.363636363625,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 101286.94788727444,
            "cpu_time": 101254.3749999999,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 136104.68339174986,
            "cpu_time": 136063.59999999995,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 316134.353983216,
            "cpu_time": 316006.999999999,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 353628.5205045715,
            "cpu_time": 353498.50000000413,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 384219.5204924792,
            "cpu_time": 384050.50000000076,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 928366,
            "real_time": 0.7554262456378871,
            "cpu_time": 0.7553206386274381,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19374,
            "real_time": 36.13760710292121,
            "cpu_time": 36.12945184267588,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9767,
            "real_time": 74.59056004233452,
            "cpu_time": 74.56076584416967,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8378,
            "real_time": 83.86936523736156,
            "cpu_time": 83.83623776557543,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4707,
            "real_time": 149.81741747684936,
            "cpu_time": 149.73932441045352,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5398,
            "real_time": 125.04704669342217,
            "cpu_time": 124.97925157465758,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4703,
            "real_time": 149.13546308786368,
            "cpu_time": 146.60301934934932,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 97360,
            "real_time": 7.199973038284088,
            "cpu_time": 7.199301561216111,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1952,
            "real_time": 355.62803120720656,
            "cpu_time": 355.5804303278664,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 833,
            "real_time": 766.3786014103714,
            "cpu_time": 766.2160864345659,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 875,
            "real_time": 849.6708571910858,
            "cpu_time": 849.3131428571456,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 460,
            "real_time": 1335.4970106814542,
            "cpu_time": 1335.1391304348185,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 520,
            "real_time": 1266.95488848222,
            "cpu_time": 1265.174999999985,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/10000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 454,
            "real_time": 1527.3928964851088,
            "cpu_time": 1521.1343612334842,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 978,
            "real_time": 718.8960889369584,
            "cpu_time": 718.7351738241438,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_bulk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 35827.11874623783,
            "cpu_time": 35821.899999999834,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_bulk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 82232.31011329012,
            "cpu_time": 82213.77777777641,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_bulk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 88812.23437492736,
            "cpu_time": 88794.62500000023,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_bulk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 139060.3750187438,
            "cpu_time": 139031.0000000028,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_bulk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 134771.74999813238,
            "cpu_time": 134729.16666666634,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_bulk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 153261.50821056217,
            "cpu_time": 152815.19999999774,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "small_buffer_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:36:54-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/small_buffer_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.86816,
            6.26025,
            5.16309
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_newdelete<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5587,
            "real_time": 125303.26364931643,
            "cpu_time": 125290.49579380703,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1386,
            "real_time": 508444.8953462875,
            "cpu_time": 508370.1298701297,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13142,
            "real_time": 53302.40831650446,
            "cpu_time": 53296.75848424895,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3269,
            "real_time": 214782.18355085887,
            "cpu_time": 214758.64178647884,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5672,
            "real_time": 124989.33357486768,
            "cpu_time": 124974.25952045145,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1368,
            "real_time": 505341.70981474785,
            "cpu_time": 505266.08187134494,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11123,
            "real_time": 60749.228267215316,
            "cpu_time": 60741.886181785514,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2888,
            "real_time": 244387.94149076848,
            "cpu_time": 244348.33795013843,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4984,
            "real_time": 138000.96970177098,
            "cpu_time": 137987.76083467118,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1235,
            "real_time": 573576.0461350502,
            "cpu_time": 573497.1659919026,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12392,
            "real_time": 57018.46957966995,
            "cpu_time": 57010.409941897924,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3112,
            "real_time": 225975.01609099738,
            "cpu_time": 225858.29048843245,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 2720,
            "real_time": 314926.0268187123,
            "cpu_time": 254273.1617647058,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 816,
            "real_time": 1201311.3235155412,
            "cpu_time": 944523.284313727,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 7568,
            "real_time": 120258.6493397035,
            "cpu_time": 92525.23784355166,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1792,
            "real_time": 444873.3035522829,
            "cpu_time": 360395.089285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 2000,
            "real_time": 429508.1455493346,
            "cpu_time": 346153.0000000006,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 480,
            "real_time": 3098163.281538291,
            "cpu_time": 2277033.3333333307,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 5824,
            "real_time": 162576.32181965004,
            "cpu_time": 127333.61950549482,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1424,
            "real_time": 678291.8443492603,
            "cpu_time": 503144.6629213481,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 752,
            "real_time": 1934699.7973154932,
            "cpu_time": 1476522.6063829765,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 112,
            "real_time": 7334677.465093722,
            "cpu_time": 5811178.571428568,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 4432,
            "real_time": 190252.0963226593,
            "cpu_time": 146544.90072202164,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1088,
            "real_time": 757798.5616080696,
            "cpu_time": 576999.0808823525,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "summing_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:37:06-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/summing_for_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            6.01074,
            6.09961,
            5.125
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11338236,
            "real_time": 61.868015977933,
            "cpu_time": 61.86147474792375,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11237,
            "real_time": 62131.92283685184,
            "cpu_time": 62126.27925602919,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 6271191.964125527,
            "cpu_time": 6270714.285714288,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87489,
            "real_time": 8098.8852890526505,
            "cpu_time": 8084.433471636433,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88696,
            "real_time": 7902.885981032539,
            "cpu_time": 7890.896996482366,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88433,
            "real_time": 7910.169754879776,
            "cpu_time": 7897.459093324887,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88271,
            "real_time": 7924.401366363232,
            "cpu_time": 7913.788220366828,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83537,
            "real_time": 8467.075966689852,
            "cpu_time": 8455.34314136251,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 77020,
            "real_time": 9021.862826939605,
            "cpu_time": 8965.853025188273,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 93833,
            "real_time": 7333.295140880929,
            "cpu_time": 7299.862521714123,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9462,
            "real_time": 73253.95878199255,
            "cpu_time": 73133.16423589093,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12311,
            "real_time": 44695.539841018835,
            "cpu_time": 44671.43205263578,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18471,
            "real_time": 37628.8799210418,
            "cpu_time": 37616.64230415248,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20871,
            "real_time": 32160.988931691067,
            "cpu_time": 32142.92559053238,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26772,
            "real_time": 25780.759752318532,
            "cpu_time": 25766.808606006325,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28639,
            "real_time": 24416.323301077882,
            "cpu_time": 24377.806487656784,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32829,
            "real_time": 21390.852327258417,
            "cpu_time": 21131.530049651195,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111,
            "real_time": 6311540.910344874,
            "cpu_time": 6310936.936936938,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 215,
            "real_time": 3253811.628146227,
            "cpu_time": 3252986.046511642,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 313,
            "real_time": 2251415.4695676635,
            "cpu_time": 2250999.9999999893,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 388,
            "real_time": 1787936.53366924,
            "cpu_time": 1787626.288659791,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 420,
            "real_time": 1652575.4951761058,
            "cpu_time": 1652040.4761904792,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 422,
            "real_time": 1658270.8342896854,
            "cpu_time": 1656618.4834123254,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 434,
            "real_time": 1616450.6518942937,
            "cpu_time": 1576617.5115207308,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9590946,
            "real_time": 73.21236872374634,
            "cpu_time": 73.20456188576136,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9402331,
            "real_time": 73.83848047515738,
            "cpu_time": 73.83456293976444,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9235539,
            "real_time": 75.63617836556358,
            "cpu_time": 75.63077801956086,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9242022,
            "real_time": 75.48505532789856,
            "cpu_time": 75.4760159627407,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9152934,
            "real_time": 76.54764035438431,
            "cpu_time": 76.53054200980816,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9244072,
            "real_time": 75.94152857903019,
            "cpu_time": 75.9144887664227,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9126467,
            "real_time": 76.52603958821783,
            "cpu_time": 76.44184765035587,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11263,
            "real_time": 62124.53013849104,
            "cpu_time": 62118.88484418008,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21559,
            "real_time": 32542.895864285958,
            "cpu_time": 32172.410594183475,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31233,
            "real_time": 23049.065345463572,
            "cpu_time": 23029.39198924209,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38345,
            "real_time": 18169.309326149654,
            "cpu_time": 18139.653149041555,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39209,
            "real_time": 18001.76831836735,
            "cpu_time": 17822.99982146956,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41244,
            "real_time": 17093.629762444518,
            "cpu_time": 16671.467364950084,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39385,
            "real_time": 19147.251492042946,
            "cpu_time": 17790.783293131943,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 6243269.7231997745,
            "cpu_time": 6242482.14285718,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 220,
            "real_time": 3192353.977250274,
            "cpu_time": 3191795.4545454374,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321,
            "real_time": 2180611.7600537953,
            "cpu_time": 2180442.3676012265,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 390,
            "real_time": 1783559.7206169788,
            "cpu_time": 1783471.7948717983,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 418,
            "real_time": 1660651.514533925,
            "cpu_time": 1654722.4880382582,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 422,
            "real_time": 1646918.6396995708,
            "cpu_time": 1642983.4123222788,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 421,
            "real_time": 1661169.339654936,
            "cpu_time": 1580800.4750593852,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9490973,
            "real_time": 73.48123928932927,
            "cpu_time": 73.46917960887671,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9486632,
            "real_time": 73.78623677089479,
            "cpu_time": 73.77971444449446,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9346969,
            "real_time": 74.98375045263138,
            "cpu_time": 74.97970732544377,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9258244,
            "real_time": 79.12133499891443,
            "cpu_time": 77.16430891214326,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9128594,
            "real_time": 76.4508249536212,
            "cpu_time": 76.43104732229287,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9248056,
            "real_time": 75.84535874184523,
            "cpu_time": 75.80620186556021,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9197270,
            "real_time": 76.02317959995017,
            "cpu_time": 75.96058395589218,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11277,
            "real_time": 63010.91451895219,
            "cpu_time": 62939.3455706298,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21150,
            "real_time": 31981.0303524753,
            "cpu_time": 31968.510638297423,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29926,
            "real_time": 23687.892632935756,
            "cpu_time": 23672.358484261153,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31234,
            "real_time": 20984.331978956227,
            "cpu_time": 20964.269706089504,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30385,
            "real_time": 23111.937075953883,
            "cpu_time": 22824.32121112414,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34616,
            "real_time": 22459.70551358884,
            "cpu_time": 20933.152299514782,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36059,
            "real_time": 18972.88241462847,
            "cpu_time": 17634.51565489871,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 6266100.446477399,
            "cpu_time": 6265437.500000002,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 219,
            "real_time": 3226491.4387012045,
            "cpu_time": 3226118.7214611764,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 321,
            "real_time": 2183089.04364914,
            "cpu_time": 2182607.4766355176,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 395,
            "real_time": 1776214.030311926,
            "cpu_time": 1776093.670886061,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 416,
            "real_time": 1702945.913376215,
            "cpu_time": 1702115.3846153906,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 389,
            "real_time": 1673271.8508240497,
            "cpu_time": 1665449.8714652993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 386,
            "real_time": 1818948.8343141703,
            "cpu_time": 1735090.6735751452,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "timed_task_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:38:06-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/timed_task_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            5.18701,
            5.87451,
            5.10254
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_dispenso<2,false>/manual_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 285.71428572250164,
            "time_unit": "ns",
            "\t0 User": 0.07214199999999993,
            "\t1 System": 0.044526,
            "mean": 9.954080702737014e-06,
            "stddev": 4.688011019863804e-07
          },
          {
            "name": "BM_dispenso<4,false>/manual_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 428.57142856789204,
            "time_unit": "ns",
            "\t0 User": 0.07397599999999982,
            "\t1 System": 0.057869000000000004,
            "mean": 9.957789792679334e-06,
            "stddev": 3.5240762570797743e-07
          },
          {
            "name": "BM_dispenso<6,false>/manual_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 285.71428572250164,
            "time_unit": "ns",
            "\t0 User": 0.074357,
            "\t1 System": 0.07025799999999999,
            "mean": 9.977264795452481e-06,
            "stddev": 1.919381593133926e-07
          },
          {
            "name": "BM_dispenso<2,true>/manual_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 428.57142856789204,
            "time_unit": "ns",
            "\t0 User": 0.071191,
            "\t1 System": 0.05306300000000003,
            "mean": 9.97029440614132e-06,
            "stddev": 4.79700518865891e-08
          },
          {
            "name": "BM_dispenso<4,true>/manual_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 571.4285714132826,
            "time_unit": "ns",
            "\t0 User": 0.07282100000000002,
            "\t1 System": 0.06379499999999994,
            "mean": 9.871454516364591e-06,
            "stddev": 4.525810659416707e-07
          },
          {
            "name": "BM_dispenso<6,true>/manual_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 571.4285714132826,
            "time_unit": "ns",
            "\t0 User": 0.07238100000000003,
            "\t1 System": 0.07241399999999998,
            "mean": 1.282492521972484e-05,
            "stddev": 2.684381062459743e-05
          },
          {
            "name": "BM_dispenso_mixed<false>/manual_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 714.285714258673,
            "time_unit": "ns",
            "\t0 User": 0.18781499999999962,
            "\t1 System": 0.11230399999999996,
            "mean": 9.979554878858283e-06,
            "stddev": 4.5220154422806786e-08
          },
          {
            "name": "BM_dispenso_mixed<true>/manual_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 428.57142856789204,
            "time_unit": "ns",
            "\t0 User": 0.16058799999999973,
            "\t1 System": 0.08947100000000008,
            "mean": 9.894181556958926e-06,
            "stddev": 5.092770671500604e-07
          },
          {
            "name": "BM_folly<2,false>/manual_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1285.714285735397,
            "time_unit": "ns",
            "\t0 User": 0.0006410000000003357,
            "\t1 System": 0.0019329999999999625,
            "mean": 0.000496583849638699,
            "stddev": 0.0001415301833754346
          },
          {
            "name": "BM_folly<4,false>/manual_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1571.428571426178,
            "time_unit": "ns",
            "\t0 User": 0.0008050000000001667,
            "\t1 System": 0.0021100000000000563,
            "mean": 0.0008797843848215387,
            "stddev": 0.00036541615103797984
          },
          {
            "name": "BM_folly<6,false>/manual_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1571.428571426178,
            "time_unit": "ns",
            "\t0 User": 0.0010409999999998476,
            "\t1 System": 0.0022499999999998632,
            "mean": 0.0013262439802661499,
            "stddev": 0.000533976408747388
          },
          {
            "name": "BM_folly<2,true>/manual_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1571.4285713944573,
            "time_unit": "ns",
            "\t0 User": 0.0007329999999998726,
            "\t1 System": 0.002018999999999993,
            "mean": 0.000556840214172594,
            "stddev": 0.0009912055774021107
          },
          {
            "name": "BM_folly<4,true>/manual_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1571.428571426178,
            "time_unit": "ns",
            "\t0 User": 0.0007899999999998464,
            "\t1 System": 0.002138999999999891,
            "mean": 0.0007247286991763077,
            "stddev": 0.00024289857675277736
          },
          {
            "name": "BM_folly<6,true>/manual_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1571.4285714578984,
            "time_unit": "ns",
            "\t0 User": 0.0007150000000000212,
            "\t1 System": 0.0020359999999999268,
            "mean": 0.00108290358630563,
            "stddev": 0.0003457069676219512
          },
          {
            "name": "BM_folly_mixed<false>/manual_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1428.5714285807876,
            "time_unit": "ns",
            "\t0 User": 0.0026290000000002145,
            "\t1 System": 0.00832300000000008,
            "mean": 0.00016504625329437125,
            "stddev": 0.00020945683632331378
          },
          {
            "name": "BM_folly_mixed<true>/manual_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 1571.428571426178,
            "time_unit": "ns",
            "\t0 User": 0.002356000000000247,
            "\t1 System": 0.007269000000000192,
            "mean": 0.00020931231811562877,
            "stddev": 2.9049332138882714e-05
          }
        ]
      }
    },
    {
      "name": "trivial_compute_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T10:38:34-07:00",
          "host_name": "macos-bench",
          "executable": "/tmp/dispenso-build/bin/trivial_compute_benchmark",
          "num_cpus": 12,
          "mhz_per_cpu": 24,
          "cpu_scaling_enabled": false,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 65536,
              "num_sharing": 0
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 131072,
              "num_sharing": 0
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 4194304,
              "num_sharing": 1
            }
          ],
          "load_avg": [
            3.71436,
            5.4541,
            4.97607
          ],
          "library_version": "v1.9.5",
          "library_build_type": "release",
          "json_schema_version": 1
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1275719,
            "real_time": 548.900365180231,
            "cpu_time": 548.8685204186816,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21648549.500241645,
            "cpu_time": 21646968.750000007,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 561459167.045541,
            "cpu_time": 561423000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104946,
            "real_time": 6661.6069209022635,
            "cpu_time": 6652.4403026318305,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 110878,
            "real_time": 6561.512048899096,
            "cpu_time": 6550.388715525174,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108577,
            "real_time": 6409.191173429974,
            "cpu_time": 6399.771590668362,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 109963,
            "real_time": 6319.209270760749,
            "cpu_time": 6307.612560588567,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115527,
            "real_time": 6044.838730943729,
            "cpu_time": 6034.364261168384,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 122318,
            "real_time": 5689.791837089,
            "cpu_time": 5678.362955574813,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 127879,
            "real_time": 5460.399416127546,
            "cpu_time": 5453.522470460368,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21669519.53040552,
            "cpu_time": 21668156.250000015,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 10992401.046678424,
            "cpu_time": 10991562.500000024,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 7441054.521861704,
            "cpu_time": 7440031.914893617,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 123,
            "real_time": 5626434.959069924,
            "cpu_time": 5612634.146341458,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 186,
            "real_time": 3741124.774434514,
            "cpu_time": 3740505.376344076,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 246,
            "real_time": 2839112.8048164453,
            "cpu_time": 2837414.634146331,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 304,
            "real_time": 2344558.6611112384,
            "cpu_time": 2241638.1578947343,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 561830165.9123971,
            "cpu_time": 561788000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 285128250.0196248,
            "cpu_time": 285112000.0000016,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 192430114.7519145,
            "cpu_time": 192418249.99999934,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 144186466.79259837,
            "cpu_time": 144153400.00000042,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 96028398.85545628,
            "cpu_time": 96008428.57142818,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 72845012.50833273,
            "cpu_time": 72785600.00000027,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 58966774.25092397,
            "cpu_time": 57100666.66666692,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1229491,
            "real_time": 580.5775178253743,
            "cpu_time": 580.5361730992754,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1216390,
            "real_time": 574.6406038290687,
            "cpu_time": 574.6027178783131,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1211502,
            "real_time": 576.7989511892471,
            "cpu_time": 576.637925484234,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1217079,
            "real_time": 575.9769562459976,
            "cpu_time": 575.9404278604759,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1209871,
            "real_time": 577.9981914972198,
            "cpu_time": 577.9078926596295,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1206584,
            "real_time": 580.669442029208,
            "cpu_time": 580.4403174582069,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1202200,
            "real_time": 579.405533944817,
            "cpu_time": 578.863749792052,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21669511.719665024,
            "cpu_time": 21667281.249999836,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 11017505.859854281,
            "cpu_time": 11017281.250000011,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94,
            "real_time": 7488596.181266326,
            "cpu_time": 7487659.57446808,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 125,
            "real_time": 5612743.663601578,
            "cpu_time": 5608791.999999994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187,
            "real_time": 3738881.6845658068,
            "cpu_time": 3737155.0802138937,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 247,
            "real_time": 2882029.6882384038,
            "cpu_time": 2862542.5101214875,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 312,
            "real_time": 2278961.006548996,
            "cpu_time": 2245532.05128203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 562269250.0473931,
            "cpu_time": 562207999.9999982,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 292155500.035733,
            "cpu_time": 292139500.00000113,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 197789510.4791969,
            "cpu_time": 197734999.99999976,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 148385433.3870113,
            "cpu_time": 148340000.0000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 99080214.29354058,
            "cpu_time": 99034285.71428512,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 74509717.54992174,
            "cpu_time": 74454555.55555548,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 59913069.41723451,
            "cpu_time": 58138083.33333329,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1213041,
            "real_time": 570.0697667909037,
            "cpu_time": 569.9535300125905,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1222612,
            "real_time": 573.3583434461269,
            "cpu_time": 573.3217079498637,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1210961,
            "real_time": 577.91274118162,
            "cpu_time": 577.8815337570699,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1213761,
            "real_time": 577.1878005612558,
            "cpu_time": 577.1572821997065,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1207341,
            "real_time": 578.9615013710945,
            "cpu_time": 578.8621441663951,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1204604,
            "real_time": 580.3538963612098,
            "cpu_time": 580.1333882338083,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1197916,
            "real_time": 581.0833497519056,
            "cpu_time": 580.5023056708508,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21669522.124284413,
            "cpu_time": 21667625.0000003,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 18929833.326035656,
            "cpu_time": 18891432.432432197,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55,
            "real_time": 12668184.836564416,
            "cpu_time": 12634472.727272727,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72,
            "real_time": 9689750.583169775,
            "cpu_time": 9626361.111111097,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108,
            "real_time": 6484188.268788987,
            "cpu_time": 6409703.703703697,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 144,
            "real_time": 4873089.986277693,
            "cpu_time": 4834187.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 212,
            "real_time": 3304771.0329004265,
            "cpu_time": 3194551.886792449,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 564335832.9543843,
            "cpu_time": 564277000.0000042,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 330473708.51272714,
            "cpu_time": 330100999.9999991,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 228447625.02548596,
            "cpu_time": 228323666.66667286,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 175415989.4771874,
            "cpu_time": 175114750.00000232,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 122168263.82791622,
            "cpu_time": 121910333.33333461,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 96645541.72195494,
            "cpu_time": 96256999.99999833,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 78237801.00120026,
            "cpu_time": 70892666.66666718,
            "time_unit": "ns"
          }
        ]
      }
    }
  ]
}

================================================
FILE: results/windows_x64.json
================================================
{
  "machine_info": {
    "timestamp": "2026-03-20T08:41:43.239873",
    "platform": "Windows",
    "platform_release": "11",
    "platform_version": "10.0.26100",
    "architecture": "AMD64",
    "processor": "Intel64 Family 6 Model 85 Stepping 4, GenuineIntel",
    "python_version": "3.12.4",
    "build_system": "buck",
    "cpu_model": "Intel(R) Xeon(R) Platinum 8175M CPU @ 2.50GHz",
    "cpu_cores": 64,
    "platform_id": "windows-xeon-64c",
    "buck_mode": "VS2022 optimized"
  },
  "results": [
    {
      "name": "cascading_parallel_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:42:18-07:00",
          "host_name": "windows-bench",
          "executable": "cascading_parallel_for_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36601,
            "real_time": 18126.627687758595,
            "cpu_time": 17929.83798256878,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/100000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 388,
            "real_time": 1871601.288658641,
            "cpu_time": 1852448.4536082475,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial/10000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 220908233.33322382,
            "cpu_time": 218750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 813,
            "real_time": 877918.8191872248,
            "cpu_time": 884071.3407134071,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1622,
            "real_time": 414515.7829837441,
            "cpu_time": 414226.26387176325,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1875,
            "real_time": 386387.0933331782,
            "cpu_time": 375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2028,
            "real_time": 355204.88165662787,
            "cpu_time": 354413.2149901381,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2235,
            "real_time": 314773.19910519477,
            "cpu_time": 279642.0581655481,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2465,
            "real_time": 273383.04259647196,
            "cpu_time": 266227.18052738335,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2809,
            "real_time": 251369.3841224314,
            "cpu_time": 233624.06550373798,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3102,
            "real_time": 218689.1682783568,
            "cpu_time": 221631.20567375887,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3382,
            "real_time": 207745.9195742041,
            "cpu_time": 207902.12891780012,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3419,
            "real_time": 209731.00321736166,
            "cpu_time": 205652.23749634397,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3004,
            "real_time": 229503.06258320724,
            "cpu_time": 223660.11984021304,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2770,
            "real_time": 246281.44404327663,
            "cpu_time": 242554.15162454874,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 142,
            "real_time": 4912614.788729987,
            "cpu_time": 4841549.295774648,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 358,
            "real_time": 1933018.4357528584,
            "cpu_time": 1789455.30726257,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 473,
            "real_time": 1554351.3742066827,
            "cpu_time": 1519556.0253699787,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 562,
            "real_time": 1211951.7793598466,
            "cpu_time": 1223309.6085409252,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 737,
            "real_time": 947158.3446412793,
            "cpu_time": 911635.0067842605,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 928,
            "real_time": 816632.6508622769,
            "cpu_time": 808189.6551724138,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1180,
            "real_time": 622457.1186436861,
            "cpu_time": 609110.1694915254,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1354,
            "real_time": 526803.6189064309,
            "cpu_time": 530834.564254062,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1557,
            "real_time": 447190.9441234014,
            "cpu_time": 451589.59537572257,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1697,
            "real_time": 419638.83323490515,
            "cpu_time": 414334.1190335887,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1657,
            "real_time": 421511.16475537355,
            "cpu_time": 414906.4574532287,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000/real_time",
            "family_index": 1,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1603,
            "real_time": 433133.18777279864,
            "cpu_time": 428883.3437305053,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 406839450.00000674,
            "cpu_time": 375000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 128752666.66666599,
            "cpu_time": 122395833.33333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 93706199.99992868,
            "cpu_time": 93750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 63308690.909087256,
            "cpu_time": 62500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 43474174.999971636,
            "cpu_time": 42968750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33269757.142859038,
            "cpu_time": 32738095.23809524,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 23640117.24135896,
            "cpu_time": 23706896.55172414,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 20076718.181812655,
            "cpu_time": 19886363.636363637,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 17050264.99998894,
            "cpu_time": 17187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 16541778.571427995,
            "cpu_time": 16369047.61904762,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39,
            "real_time": 18093715.384636126,
            "cpu_time": 18028846.153846152,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/10000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 18486284.210523177,
            "cpu_time": 18503289.47368421,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_task_group/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 904,
            "real_time": 791001.9911504951,
            "cpu_time": 795077.4336283186,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_task_group/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2124,
            "real_time": 327248.2580038129,
            "cpu_time": 331038.13559322036,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_task_group/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2278,
            "real_time": 265875.1975417691,
            "cpu_time": 267504.3898156278,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_task_group/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3033,
            "real_time": 230665.2159576594,
            "cpu_time": 231824.92581602375,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_task_group/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3906,
            "real_time": 179306.16999503932,
            "cpu_time": 176011.26472094213,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_task_group/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4612,
            "real_time": 154205.31222896997,
            "cpu_time": 155843.4518647008,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_task_group/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5435,
            "real_time": 129092.03311861103,
            "cpu_time": 129369.82520699172,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_task_group/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6096,
            "real_time": 112570.32480305883,
            "cpu_time": 112778.87139107611,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_task_group/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7198,
            "real_time": 98240.52514576392,
            "cpu_time": 93341.90052792443,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_task_group/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7604,
            "real_time": 90351.59126779658,
            "cpu_time": 90412.94055760127,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_task_group/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7617,
            "real_time": 91064.52671654278,
            "cpu_time": 92309.96455297362,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_task_group/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8750,
            "real_time": 82600.54857143718,
            "cpu_time": 82142.85714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_task_group/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 131,
            "real_time": 5489421.37404694,
            "cpu_time": 5486641.221374046,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_task_group/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 405,
            "real_time": 1714686.9135799925,
            "cpu_time": 1658950.6172839506,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_task_group/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 572,
            "real_time": 1231472.3776223848,
            "cpu_time": 1229239.5104895106,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb_task_group/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 713,
            "real_time": 953330.4347833862,
            "cpu_time": 942321.178120617,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb_task_group/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1033,
            "real_time": 740634.8499515749,
            "cpu_time": 741166.5053242982,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb_task_group/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1239,
            "real_time": 574660.2098466336,
            "cpu_time": 567493.9467312349,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb_task_group/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1795,
            "real_time": 411137.214484413,
            "cpu_time": 409122.5626740947,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/16/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb_task_group/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2222,
            "real_time": 315095.1845182348,
            "cpu_time": 316437.8937893789,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/24/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb_task_group/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2694,
            "real_time": 259162.0267261534,
            "cpu_time": 260996.6592427617,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/32/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb_task_group/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2989,
            "real_time": 234158.54800928867,
            "cpu_time": 235237.53763800603,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/48/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb_task_group/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3207,
            "real_time": 216791.39382621433,
            "cpu_time": 219246.95977549112,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/64/100000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb_task_group/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3493,
            "real_time": 201929.6593186341,
            "cpu_time": 201295.44803893502,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/1/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb_task_group/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 387399300.0001974,
            "cpu_time": 390625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/2/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb_task_group/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 128894866.66654193,
            "cpu_time": 127604166.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/3/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb_task_group/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 83954150.00006779,
            "cpu_time": 82031250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/4/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb_task_group/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 66419772.72730318,
            "cpu_time": 66761363.63636363,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/6/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb_task_group/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 42381318.75001727,
            "cpu_time": 41992187.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/8/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb_task_group/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 32760295.238100905,
            "cpu_time": 32738095.23809524,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/12/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb_task_group/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30,
            "real_time": 23412256.66665802,
            "cpu_time": 22916666.666666668,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/16/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb_task_group/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 20146739.99999234,
            "cpu_time": 20089285.714285713,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/24/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb_task_group/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 17918942.10526535,
            "cpu_time": 17680921.05263158,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/32/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb_task_group/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 17288219.04762732,
            "cpu_time": 17113095.23809524,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/48/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb_task_group/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 17730107.50000594,
            "cpu_time": 17187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_task_group/64/10000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb_task_group/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39,
            "real_time": 17831479.487190686,
            "cpu_time": 17628205.128205128,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_blocking/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46945,
            "real_time": 14741.759505814864,
            "cpu_time": 14644.797102992865,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_blocking/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47058,
            "real_time": 14866.492413625363,
            "cpu_time": 14277.593607888139,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_blocking/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47044,
            "real_time": 14865.525890659896,
            "cpu_time": 14946.114275996939,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_blocking/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47017,
            "real_time": 14921.40076993976,
            "cpu_time": 14290.044026628666,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_blocking/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47217,
            "real_time": 14880.833174482921,
            "cpu_time": 14560.433742084419,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_blocking/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46961,
            "real_time": 14875.458359071257,
            "cpu_time": 14639.807499840294,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_blocking/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46700,
            "real_time": 14904.42826552466,
            "cpu_time": 14387.044967880085,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_blocking/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46694,
            "real_time": 14960.11693150802,
            "cpu_time": 14388.893648006167,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_blocking/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46556,
            "real_time": 15067.048285931416,
            "cpu_time": 15102.77944840622,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_blocking/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45969,
            "real_time": 15164.856751282998,
            "cpu_time": 14615.828058039113,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_blocking/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45753,
            "real_time": 15460.382925693075,
            "cpu_time": 15367.844731493016,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_blocking/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44920,
            "real_time": 15871.433659830913,
            "cpu_time": 16000.667853962601,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_blocking/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 444,
            "real_time": 1526928.6036036154,
            "cpu_time": 1513231.981981982,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_blocking/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 846,
            "real_time": 830264.0661934267,
            "cpu_time": 812647.7541371158,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_blocking/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1246,
            "real_time": 552650.9630814242,
            "cpu_time": 539225.521669342,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_blocking/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1672,
            "real_time": 418455.98086170544,
            "cpu_time": 420529.3062200957,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_blocking/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1887,
            "real_time": 305991.36195059965,
            "cpu_time": 306372.54901960783,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_blocking/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2263,
            "real_time": 234318.33848856762,
            "cpu_time": 234754.75033141847,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_blocking/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2844,
            "real_time": 231394.4796063695,
            "cpu_time": 230748.94514767933,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/16/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_blocking/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3189,
            "real_time": 194020.85293197815,
            "cpu_time": 195986.20257133897,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/24/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_blocking/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2946,
            "real_time": 225480.14256624988,
            "cpu_time": 222759.67413441956,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/32/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_blocking/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2264,
            "real_time": 275664.885158758,
            "cpu_time": 262257.0671378092,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/48/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_blocking/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2035,
            "real_time": 299774.4963143794,
            "cpu_time": 291769.0417690418,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/64/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_blocking/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1714,
            "real_time": 371656.00933474954,
            "cpu_time": 373760.2100350058,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/1/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_blocking/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 199952066.66650724,
            "cpu_time": 203125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/2/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_blocking/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102800599.99994202,
            "cpu_time": 95982142.85714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/3/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_blocking/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 68918229.99996294,
            "cpu_time": 68750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/4/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_blocking/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 51819028.57137694,
            "cpu_time": 51339285.71428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/6/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_blocking/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 40618217.64705254,
            "cpu_time": 40441176.47058824,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/8/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_blocking/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 29560056.521760367,
            "cpu_time": 28532608.695652176,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/12/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_blocking/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 23345778.571410846,
            "cpu_time": 22879464.285714287,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/16/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_blocking/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 22031491.428554416,
            "cpu_time": 21875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/24/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_blocking/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 20624918.421067137,
            "cpu_time": 20559210.52631579,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/32/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_blocking/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 19562622.222211856,
            "cpu_time": 19531250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/48/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_blocking/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 17898750.000002015,
            "cpu_time": 17269736.842105262,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_blocking/64/10000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_blocking/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 19239791.89187604,
            "cpu_time": 18158783.783783782,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_cascaded/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49037,
            "real_time": 14243.51815976715,
            "cpu_time": 14338.662642494442,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_cascaded/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47439,
            "real_time": 14454.417251627463,
            "cpu_time": 14162.925019498725,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_cascaded/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48357,
            "real_time": 14459.798994977096,
            "cpu_time": 14217.176417064748,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_cascaded/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48406,
            "real_time": 14461.002768250732,
            "cpu_time": 14525.575341899765,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_cascaded/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48421,
            "real_time": 14487.468247245839,
            "cpu_time": 14198.384998244563,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_cascaded/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44576,
            "real_time": 14981.62688442219,
            "cpu_time": 14020.99784637473,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_cascaded/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48026,
            "real_time": 14597.607545926017,
            "cpu_time": 13989.818015241744,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_cascaded/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48212,
            "real_time": 14578.580021558022,
            "cpu_time": 14584.02472413507,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_cascaded/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47482,
            "real_time": 14678.90569058702,
            "cpu_time": 14150.098984878481,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_cascaded/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47467,
            "real_time": 14820.44156991453,
            "cpu_time": 14812.922662059957,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_cascaded/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46491,
            "real_time": 15077.182680519616,
            "cpu_time": 15123.894947409175,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_cascaded/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45632,
            "real_time": 15202.662605176862,
            "cpu_time": 15066.181626928472,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_cascaded/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 426,
            "real_time": 1551691.7840362117,
            "cpu_time": 1577171.3615023475,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_cascaded/2/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 761,
            "real_time": 875754.9277272715,
            "cpu_time": 862352.1681997372,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_cascaded/3/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1208,
            "real_time": 581173.9238408143,
            "cpu_time": 582057.119205298,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_cascaded/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1546,
            "real_time": 407334.73479957413,
            "cpu_time": 394162.35446313064,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_cascaded/6/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2304,
            "real_time": 306607.85590283823,
            "cpu_time": 305175.78125,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_cascaded/8/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2955,
            "real_time": 253322.8764805983,
            "cpu_time": 253807.10659898477,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_cascaded/12/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4347,
            "real_time": 187819.57671968502,
            "cpu_time": 190504.94593972855,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/16/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_cascaded/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4520,
            "real_time": 179148.2964603572,
            "cpu_time": 179756.6371681416,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/24/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_cascaded/24/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3667,
            "real_time": 171820.64357765776,
            "cpu_time": 174700.02727024816,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/32/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_cascaded/32/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3850,
            "real_time": 190077.3506493753,
            "cpu_time": 186688.3116883117,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/48/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_cascaded/48/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3535,
            "real_time": 218409.92927868612,
            "cpu_time": 221004.243281471,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/64/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_cascaded/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3166,
            "real_time": 228597.69425139533,
            "cpu_time": 227021.4782059381,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/1/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_cascaded/1/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 196639033.33336446,
            "cpu_time": 197916666.66666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/2/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_cascaded/2/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 132378919.99997373,
            "cpu_time": 131250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/3/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_cascaded/3/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 74751255.55558608,
            "cpu_time": 74652777.77777778,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/4/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_cascaded/4/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 52591566.666706055,
            "cpu_time": 52083333.333333336,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/6/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_cascaded/6/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 39176384.21051762,
            "cpu_time": 38651315.78947368,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/8/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_cascaded/8/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 30249945.4545465,
            "cpu_time": 29119318.181818184,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/12/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_cascaded/12/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 24379596.428583942,
            "cpu_time": 24553571.42857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/16/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_cascaded/16/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 20655768.750003744,
            "cpu_time": 20019531.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/24/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_cascaded/24/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 17909888.571427602,
            "cpu_time": 17857142.85714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/32/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_cascaded/32/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 18614663.157892976,
            "cpu_time": 17680921.05263158,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/48/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_cascaded/48/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39,
            "real_time": 18371323.076910183,
            "cpu_time": 18429487.17948718,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_cascaded/64/10000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_cascaded/64/10000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 18697297.29728656,
            "cpu_time": 17314189.18918919,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "concurrent_vector_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:44:56-07:00",
          "host_name": "windows-bench",
          "executable": "concurrent_vector_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_std_push_back_serial",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 7517800.892864151,
            "cpu_time": 6975446.428571428,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_push_back_serial",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 29579995.999993116,
            "cpu_time": 30000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 14866904.000009527,
            "cpu_time": 15000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 13250582.142859457,
            "cpu_time": 13113839.285714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 9366960.000006657,
            "cpu_time": 9375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 280,
            "real_time": 2494382.5000004186,
            "cpu_time": 2511160.714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_push_back_serial_reserve",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_std_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 2753903.787878919,
            "cpu_time": 2781723.484848485,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_reserve",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 15424524.000009114,
            "cpu_time": 15312500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_reserve",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 13223976.78572023,
            "cpu_time": 13392857.142857144,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 8904556.000003746,
            "cpu_time": 8750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_push_back_serial_grow_by_alternative_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 2587664.3939379086,
            "cpu_time": 2604166.6666666665,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4073,
            "real_time": 156543.87429409043,
            "cpu_time": 157285.7844340781,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 560,
            "real_time": 1169003.5714293248,
            "cpu_time": 1171875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 498,
            "real_time": 1340234.136545675,
            "cpu_time": 1349146.5863453816,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 896,
            "real_time": 673235.602677989,
            "cpu_time": 662667.4107142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_iterate_reverse",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_std_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 677335.9821425272,
            "cpu_time": 669642.8571428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_iterate_reverse",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 448,
            "real_time": 1645808.705356038,
            "cpu_time": 1639229.9107142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_iterate_reverse",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 249,
            "real_time": 2751136.1445778976,
            "cpu_time": 2698293.172690763,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_iterate_reverse",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_iterate_reverse",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 6679877.678574746,
            "cpu_time": 6556919.642857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_lower_bound",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_std_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 56842154.545434326,
            "cpu_time": 55397727.27272727,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_lower_bound",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 179241824.99996278,
            "cpu_time": 171875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_lower_bound",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 102715500.00000691,
            "cpu_time": 104166666.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_lower_bound",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_lower_bound",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 227708700.00022113,
            "cpu_time": 229166666.66666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_index",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_std_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4073,
            "real_time": 159802.67616011872,
            "cpu_time": 157285.7844340781,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_index",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 747,
            "real_time": 1016580.3212857594,
            "cpu_time": 1024933.0655957162,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_index",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 2669900.757575416,
            "cpu_time": 2663352.272727273,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_index",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_index",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213,
            "real_time": 3376679.3427198986,
            "cpu_time": 3374413.145539906,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_random",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_std_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 280,
            "real_time": 2473420.714285827,
            "cpu_time": 2399553.5714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_random",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 6518578.571428536,
            "cpu_time": 6556919.642857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_random",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 236,
            "real_time": 3134037.7118656267,
            "cpu_time": 3111758.474576271,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_random",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_random",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 195,
            "real_time": 3642050.2564093717,
            "cpu_time": 3605769.230769231,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 328254399.9998779,
            "cpu_time": 289062500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 736798500.0000771,
            "cpu_time": 593750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel",
            "family_index": 33,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 58401463.6363215,
            "cpu_time": 58238636.36363637,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel",
            "family_index": 34,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 44388806.66669623,
            "cpu_time": 41666666.666666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_reserve",
            "family_index": 35,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 324387349.9995971,
            "cpu_time": 296875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_reserve",
            "family_index": 36,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 58540649.9999408,
            "cpu_time": 56250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_reserve",
            "family_index": 37,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_reserve",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 44234699.99999404,
            "cpu_time": 43198529.4117647,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_clear",
            "family_index": 38,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 329473050.0001606,
            "cpu_time": 304687500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_clear",
            "family_index": 39,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 707295700.0000315,
            "cpu_time": 578125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_clear",
            "family_index": 40,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 60796890.90909762,
            "cpu_time": 58238636.36363637,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_clear",
            "family_index": 41,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_clear",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41515088.235267766,
            "cpu_time": 41360294.11764706,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_10",
            "family_index": 42,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 56859089.99994353,
            "cpu_time": 50000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_10",
            "family_index": 43,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 282806100.00001615,
            "cpu_time": 187500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_10",
            "family_index": 44,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 12945349.999995414,
            "cpu_time": 13113839.285714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_10",
            "family_index": 45,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_10",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 5652936.607142627,
            "cpu_time": 5440848.214285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_100",
            "family_index": 46,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 21150102.439028937,
            "cpu_time": 18673780.48780488,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_100",
            "family_index": 47,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 73043863.41463503,
            "cpu_time": 20198170.731707316,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_100",
            "family_index": 48,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 172,
            "real_time": 4198957.558142062,
            "cpu_time": 4178779.069767442,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_100",
            "family_index": 49,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_100",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 643807.8000001042,
            "cpu_time": 640625.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_parallel_grow_by_max",
            "family_index": 50,
            "per_family_instance_index": 0,
            "run_name": "BM_std_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 448,
            "real_time": 3556605.1339291753,
            "cpu_time": 1429966.517857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_deque_parallel_grow_by_max",
            "family_index": 51,
            "per_family_instance_index": 0,
            "run_name": "BM_deque_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 29776539.99999651,
            "cpu_time": 14930555.555555556,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_parallel_grow_by_max",
            "family_index": 52,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 448,
            "real_time": 1595859.8214293066,
            "cpu_time": 1569475.4464285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_parallel_grow_by_max",
            "family_index": 53,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_parallel_grow_by_max",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11200,
            "real_time": 61743.41964286408,
            "cpu_time": 61383.92857142857,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_each_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:45:54-07:00",
          "host_name": "windows-bench",
          "executable": "for_each_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7466667,
            "real_time": 84.04830428352788,
            "cpu_time": 83.70535340601101,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2240,
            "real_time": 309369.955356991,
            "cpu_time": 313895.08928571426,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 67237781.81822127,
            "cpu_time": 65340909.09090909,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 646207,
            "real_time": 1084.4699918143858,
            "cpu_time": 1063.9005767501744,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 460153,
            "real_time": 1519.7095313950595,
            "cpu_time": 1494.0682772903795,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 447192,
            "real_time": 1572.0795541952234,
            "cpu_time": 1502.4307232687527,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197920,
            "real_time": 3625.7103880354125,
            "cpu_time": 3631.5177849636216,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 118455,
            "real_time": 5356.712675701717,
            "cpu_time": 5408.171879616732,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 131089,
            "real_time": 5282.134275190921,
            "cpu_time": 5125.33469627505,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 96102,
            "real_time": 7214.105845864306,
            "cpu_time": 6991.26969261826,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94876,
            "real_time": 9071.630338554369,
            "cpu_time": 9057.875542813777,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 55603,
            "real_time": 12871.75152419334,
            "cpu_time": 12926.460802474687,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37995,
            "real_time": 18384.761152789448,
            "cpu_time": 17683.2477957626,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30094,
            "real_time": 23181.046055678067,
            "cpu_time": 23364.291885425668,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24309,
            "real_time": 28533.958616168267,
            "cpu_time": 26996.17425644823,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 676,
            "real_time": 1034970.1183433633,
            "cpu_time": 993897.9289940828,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1176,
            "real_time": 595689.3707482524,
            "cpu_time": 584608.8435374149,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1699,
            "real_time": 405083.57857527357,
            "cpu_time": 404649.79399646854,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1127796.8000003968,
            "cpu_time": 1140625.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3300,
            "real_time": 216627.5757576152,
            "cpu_time": 213068.18181818182,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4104,
            "real_time": 171827.0224171569,
            "cpu_time": 167519.49317738792,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1713,
            "real_time": 402656.97606550186,
            "cpu_time": 401342.6736719206,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3284,
            "real_time": 305066.35200955323,
            "cpu_time": 304506.69914738124,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3155,
            "real_time": 213281.2044373861,
            "cpu_time": 212955.62599049127,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3168,
            "real_time": 216792.99242427622,
            "cpu_time": 217013.88888888888,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4604,
            "real_time": 168218.04952229635,
            "cpu_time": 169689.40052128583,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3950,
            "real_time": 160331.34177218293,
            "cpu_time": 146360.7594936709,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_for_each_n/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 109234616.66667815,
            "cpu_time": 109375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_for_each_n/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 66536019.999966815,
            "cpu_time": 65625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_for_each_n/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 44435031.250031896,
            "cpu_time": 44921875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_for_each_n/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 76661340.00000966,
            "cpu_time": 76562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_for_each_n/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 21261312.12119645,
            "cpu_time": 21306818.181818184,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_for_each_n/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 35425519.99998977,
            "cpu_time": 32812500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_for_each_n/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 27262089.47368784,
            "cpu_time": 27960526.315789472,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_for_each_n/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 28305817.391296647,
            "cpu_time": 27853260.86956522,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_for_each_n/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31,
            "real_time": 20998077.41935013,
            "cpu_time": 20665322.580645163,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_for_each_n/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39,
            "real_time": 18547441.025638808,
            "cpu_time": 18429487.17948718,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_for_each_n/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 17987158.139539834,
            "cpu_time": 17805232.558139537,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_for_each_n/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 16404246.341463897,
            "cpu_time": 16006097.56097561,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_deque/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87191,
            "real_time": 8009.351882652337,
            "cpu_time": 7705.783853838126,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_deque/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17040,
            "real_time": 39165.639671349956,
            "cpu_time": 38512.32394366197,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_deque/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15411,
            "real_time": 33065.22613718145,
            "cpu_time": 32444.35792615664,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_deque/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12773,
            "real_time": 58973.15430992767,
            "cpu_time": 59940.890941830425,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_deque/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12119,
            "real_time": 56404.99216112503,
            "cpu_time": 55439.80526446077,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_deque/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11957,
            "real_time": 52330.86058374619,
            "cpu_time": 52270.63644726938,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_deque/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12943,
            "real_time": 53960.34922351268,
            "cpu_time": 54324.73151510469,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_deque/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13636,
            "real_time": 61547.38193015489,
            "cpu_time": 61876.65004400117,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_deque/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10919,
            "real_time": 62366.87425594681,
            "cpu_time": 61532.649510028394,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_deque/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10956,
            "real_time": 67295.12595833918,
            "cpu_time": 65603.3223804308,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_deque/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10498,
            "real_time": 62915.879215068206,
            "cpu_time": 62511.90702991046,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_deque/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11010,
            "real_time": 63817.547683906836,
            "cpu_time": 63862.39782016349,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_deque/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 87,
            "real_time": 8026022.988501765,
            "cpu_time": 7902298.850574712,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_deque/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 20114088.57142669,
            "cpu_time": 20089285.714285713,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_deque/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 55972607.69228832,
            "cpu_time": 56490384.615384616,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_deque/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 62716099.99996806,
            "cpu_time": 61079545.45454545,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_deque/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 49736192.8571211,
            "cpu_time": 49107142.85714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_deque/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 58105458.333329804,
            "cpu_time": 57291666.666666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_deque/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 53120723.07690239,
            "cpu_time": 52884615.384615384,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_deque/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 53590591.66668582,
            "cpu_time": 53385416.666666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_deque/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 58538541.66662132,
            "cpu_time": 58593750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n_deque/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 56744900.00001242,
            "cpu_time": 57291666.666666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n_deque/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 47827500.00000911,
            "cpu_time": 47916666.666666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n_deque/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 43286262.50003254,
            "cpu_time": 42968750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_for_each_n_deque/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 851916500.0006979,
            "cpu_time": 828125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_for_each_n_deque/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4051674400.0005927,
            "cpu_time": 4031250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_for_each_n_deque/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4515227100.000629,
            "cpu_time": 4468750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_for_each_n_deque/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4694814299.999962,
            "cpu_time": 4640625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_for_each_n_deque/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4861839099.999997,
            "cpu_time": 4828125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_for_each_n_deque/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5035947000.000306,
            "cpu_time": 5015625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_for_each_n_deque/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5235028399.999464,
            "cpu_time": 5187500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_for_each_n_deque/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5249350699.999923,
            "cpu_time": 5234375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_for_each_n_deque/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5716208199.999528,
            "cpu_time": 5703125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_for_each_n_deque/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 6091953799.999828,
            "cpu_time": 5921875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_for_each_n_deque/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4860996599.999453,
            "cpu_time": 4843750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_deque/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_for_each_n_deque/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4421639799.9998665,
            "cpu_time": 4218750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_list/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70257,
            "real_time": 10174.795394057788,
            "cpu_time": 10007.899568726249,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_list/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30596,
            "real_time": 23197.568309595623,
            "cpu_time": 23491.632893188653,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_list/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28899,
            "real_time": 23822.924668656386,
            "cpu_time": 23789.750510398284,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_list/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20372,
            "real_time": 28461.893775754656,
            "cpu_time": 28378.411545258197,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_list/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22494,
            "real_time": 28092.8603182864,
            "cpu_time": 27090.557481995198,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_list/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24638,
            "real_time": 23696.57033848705,
            "cpu_time": 23464.769867684066,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_list/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23677,
            "real_time": 24074.114119192094,
            "cpu_time": 24417.15588968197,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_list/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23162,
            "real_time": 31412.114670590858,
            "cpu_time": 31706.027113375356,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_list/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19082,
            "real_time": 33222.74918770779,
            "cpu_time": 33572.214652552146,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_list/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18440,
            "real_time": 39872.901301540194,
            "cpu_time": 38977.76572668113,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_list/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16035,
            "real_time": 43408.90551919424,
            "cpu_time": 43849.391955098225,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_list/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12444,
            "real_time": 49250.57859209888,
            "cpu_time": 43946.882031501125,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_list/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 17695149.99998339,
            "cpu_time": 17795138.888888888,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_list/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31141654.545453176,
            "cpu_time": 30539772.727272727,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_list/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 30434668.181814313,
            "cpu_time": 30539772.727272727,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_list/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 29903634.782583497,
            "cpu_time": 29891304.347826086,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_list/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 44486087.49999039,
            "cpu_time": 43945312.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_list/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 34372458.33333691,
            "cpu_time": 33203125.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_list/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 32176096.000002872,
            "cpu_time": 31875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_list/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 41001626.08697049,
            "cpu_time": 41440217.39130435,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_list/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 36350609.09089754,
            "cpu_time": 36221590.90909091,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n_list/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33771790.476185694,
            "cpu_time": 33482142.85714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n_list/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 32093731.578961045,
            "cpu_time": 32072368.42105263,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_list/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n_list/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 33987964.999960236,
            "cpu_time": 32812500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_for_each_n_set/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 220680,
            "real_time": 3307.6101141892304,
            "cpu_time": 3327.7823092260287,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_for_each_n_set/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 99121,
            "real_time": 6816.580744751944,
            "cpu_time": 6778.33153418549,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_for_each_n_set/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 94229,
            "real_time": 6890.116630755298,
            "cpu_time": 6798.597034883104,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_for_each_n_set/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 102989,
            "real_time": 8672.007690145374,
            "cpu_time": 8647.768208255251,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_for_each_n_set/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105705,
            "real_time": 6706.4651624836015,
            "cpu_time": 6799.58374722104,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_for_each_n_set/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74946,
            "real_time": 10548.869852954203,
            "cpu_time": 10632.655511968618,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_for_each_n_set/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73297,
            "real_time": 11562.660136162458,
            "cpu_time": 11511.385186296848,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/16/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_for_each_n_set/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51856,
            "real_time": 12325.000000005935,
            "cpu_time": 12052.60721999383,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/24/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_for_each_n_set/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43992,
            "real_time": 15582.740043637485,
            "cpu_time": 15627.841425713767,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/32/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_for_each_n_set/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34138,
            "real_time": 19274.075809962564,
            "cpu_time": 19223.446013240376,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/48/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_for_each_n_set/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30144,
            "real_time": 26063.528397031692,
            "cpu_time": 25917.26380042463,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/64/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_for_each_n_set/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22325,
            "real_time": 29953.151175792107,
            "cpu_time": 29395.296752519596,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_for_each_n_set/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 218450,
            "real_time": 3201.5060654608983,
            "cpu_time": 3218.6999313344013,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_for_each_n_set/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100545,
            "real_time": 6997.247003823731,
            "cpu_time": 6837.734347804466,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_for_each_n_set/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 93447,
            "real_time": 7690.123813500883,
            "cpu_time": 7691.525677656854,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_for_each_n_set/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101395,
            "real_time": 8664.049509344155,
            "cpu_time": 8475.516544208294,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_for_each_n_set/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105507,
            "real_time": 6846.994038311776,
            "cpu_time": 6664.249765418408,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_for_each_n_set/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 72022,
            "real_time": 10638.479908910924,
            "cpu_time": 10630.43236788759,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_for_each_n_set/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76376,
            "real_time": 11471.217398130926,
            "cpu_time": 11456.478474913585,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/16/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_for_each_n_set/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52940,
            "real_time": 12281.87570834865,
            "cpu_time": 12100.963354741216,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/24/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_for_each_n_set/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42969,
            "real_time": 16018.497056011467,
            "cpu_time": 15999.906909632526,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/32/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_for_each_n_set/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36676,
            "real_time": 20795.44116044832,
            "cpu_time": 20449.34016795725,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/48/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_for_each_n_set/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28420,
            "real_time": 23569.208303997504,
            "cpu_time": 23640.921885995776,
            "time_unit": "ns"
          },
          {
            "name": "BM_for_each_n_set/64/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_for_each_n_set/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23257,
            "real_time": 30612.809046745686,
            "cpu_time": 29560.992389388142,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "for_latency_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:49:32-07:00",
          "host_name": "windows-bench",
          "executable": "for_latency_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 47151561.53843754,
            "cpu_time": 10817307.692307692,
            "time_unit": "ns",
            "mean": 0.009866256676734618,
            "stddev": 0.00017188413544657612
          },
          {
            "name": "BM_tbb/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 47286566.66666211,
            "cpu_time": 6250000.0,
            "time_unit": "ns",
            "mean": 0.01162628341329158,
            "stddev": 0.00038164494907208036
          },
          {
            "name": "BM_tbb/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 47342366.66669555,
            "cpu_time": 8333333.333333333,
            "time_unit": "ns",
            "mean": 0.005110984746715985,
            "stddev": 2.8214285872140384e-05
          },
          {
            "name": "BM_tbb/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 47161833.33330264,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0035127603199119526,
            "stddev": 5.078153538875011e-05
          },
          {
            "name": "BM_tbb/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 47164519.99999359,
            "cpu_time": 8333333.333333333,
            "time_unit": "ns",
            "mean": 0.0026620443730280387,
            "stddev": 4.993449566211268e-05
          },
          {
            "name": "BM_tbb/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 45003046.66665518,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0018042646933584669,
            "stddev": 2.1748562041657222e-05
          },
          {
            "name": "BM_tbb/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 38977068.42101462,
            "cpu_time": 1644736.8421052631,
            "time_unit": "ns",
            "mean": 0.0013987661052995367,
            "stddev": 6.0021690135250075e-05
          },
          {
            "name": "BM_tbb/12/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33855600.00000017,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0009833384000168216,
            "stddev": 3.800137869086121e-05
          },
          {
            "name": "BM_tbb/16/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 31342742.85713175,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0007570424763149017,
            "stddev": 1.6869748052078347e-05
          },
          {
            "name": "BM_tbb/24/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 32234536.363665197,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0005633357816970437,
            "stddev": 1.8899260504457734e-05
          },
          {
            "name": "BM_tbb/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31261881.818202462,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0005073009817599748,
            "stddev": 2.1597028407683795e-05
          },
          {
            "name": "BM_tbb/48/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31327159.09094424,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00041888141814442036,
            "stddev": 3.08616041150368e-05
          },
          {
            "name": "BM_tbb/64/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31395763.63636562,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00041174054529785116,
            "stddev": 5.710726322374978e-05
          },
          {
            "name": "BM_dispenso/1/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18,
            "real_time": 39721527.77775288,
            "cpu_time": 9548611.111111112,
            "time_unit": "ns",
            "mean": 0.009355069911129653,
            "stddev": 0.000533704182538213
          },
          {
            "name": "BM_dispenso/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 35242369.99996901,
            "cpu_time": 3125000.0,
            "time_unit": "ns",
            "mean": 0.004754647199888495,
            "stddev": 0.0005980738957520091
          },
          {
            "name": "BM_dispenso/3/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33481509.523794338,
            "cpu_time": 2976190.476190476,
            "time_unit": "ns",
            "mean": 0.003077694933294662,
            "stddev": 9.262964406965801e-05
          },
          {
            "name": "BM_dispenso/4/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 32807752.380947582,
            "cpu_time": 2232142.8571428573,
            "time_unit": "ns",
            "mean": 0.002276384152450884,
            "stddev": 8.054854388480215e-05
          },
          {
            "name": "BM_dispenso/6/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31958850.000013296,
            "cpu_time": 1420454.5454545454,
            "time_unit": "ns",
            "mean": 0.0015414021454902963,
            "stddev": 2.5848883903370683e-05
          },
          {
            "name": "BM_dispenso/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31791295.454530135,
            "cpu_time": 710227.2727272727,
            "time_unit": "ns",
            "mean": 0.0012085252364654893,
            "stddev": 6.189326538413849e-05
          },
          {
            "name": "BM_dispenso/12/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31275231.818199243,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.000826921236280627,
            "stddev": 2.6505786765486095e-05
          },
          {
            "name": "BM_dispenso/16/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31198531.818187226,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0006963082545222609,
            "stddev": 6.463564003765823e-05
          },
          {
            "name": "BM_dispenso/24/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31174109.09089813,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.000561423636513196,
            "stddev": 6.30445086674893e-05
          },
          {
            "name": "BM_dispenso/32/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 31008299.999998797,
            "cpu_time": 2717391.304347826,
            "time_unit": "ns",
            "mean": 0.000515460069476356,
            "stddev": 6.020050637735911e-05
          },
          {
            "name": "BM_dispenso/48/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31082345.454540864,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00045824632723617304,
            "stddev": 5.80102419487137e-05
          },
          {
            "name": "BM_dispenso/64/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 31251334.782609772,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.000475727617571386,
            "stddev": 6.34318979574684e-05
          }
        ]
      }
    },
    {
      "name": "future_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:50:02-07:00",
          "host_name": "windows-bench",
          "executable": "future_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial_tree<kSmallSize>/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 340,
            "real_time": 2028086.1764711197,
            "cpu_time": 1976102.9411764706,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kMediumSize>/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 16244425.581410112,
            "cpu_time": 15988372.093023255,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial_tree<kLargeSize>/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 130485699.99991742,
            "cpu_time": 131250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kSmallSize>/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async deadlocks on Windows threadpool",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kMediumSize>/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async deadlocks on Windows threadpool",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_std_tree<kLargeSize>/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_std_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "error_occurred": true,
            "error_message": "std::async deadlocks on Windows threadpool",
            "iterations": 0,
            "real_time": 0.0,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kSmallSize>/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 26942503.70371231,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kMediumSize>/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 227501300.00016117,
            "cpu_time": 5208333.333333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_folly_tree<kLargeSize>/real_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1784169900.0000517,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kSmallSize>/real_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 666,
            "real_time": 1055777.927928472,
            "cpu_time": 164226.72672672672,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kMediumSize>/real_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101,
            "real_time": 6899076.237621754,
            "cpu_time": 1082920.792079208,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree<kLargeSize>/real_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 51183699.99998625,
            "cpu_time": 6510416.666666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1186,
            "real_time": 592249.4097811138,
            "cpu_time": 553330.5227655986,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 175,
            "real_time": 4015842.8571417714,
            "cpu_time": 3839285.714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31573409.090924546,
            "cpu_time": 31960227.272727273,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1257,
            "real_time": 557404.2959427392,
            "cpu_time": 509645.98249801114,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 172,
            "real_time": 4001654.0697674933,
            "cpu_time": 3997093.023255814,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_taskset_tree_bulk<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 32546909.52382502,
            "cpu_time": 31994047.61904762,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kSmallSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 680,
            "real_time": 1051290.000000068,
            "cpu_time": 459558.82352941175,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kMediumSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 102,
            "real_time": 6891887.254904063,
            "cpu_time": 3063725.4901960786,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_tree_when_all<kLargeSize>/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 51322991.66666598,
            "cpu_time": 19531250.0,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:50:21-07:00",
          "host_name": "windows-bench",
          "executable": "graph_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_taskflow_build_big_tree",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_build_big_tree",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 25833532.142866846,
            "cpu_time": 25669642.85714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::Graph>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 5941274.999997859,
            "cpu_time": 5859375.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_build_big_tree<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112,
            "real_time": 7067700.892849059,
            "cpu_time": 6975446.428571428,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_chain",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_chain",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4978,
            "real_time": 136213.96143027165,
            "cpu_time": 134968.86299718762,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_bi_prop_dependency_group",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_build_bi_prop_dependency_group",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 263529,
            "real_time": 2581.249501953236,
            "cpu_time": 2549.52965328294,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::Graph>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24889,
            "real_time": 28032.311462890517,
            "cpu_time": 28250.431917714654,
            "time_unit": "ns"
          },
          {
            "name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_build_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24889,
            "real_time": 28677.725099469477,
            "cpu_time": 28878.219293663868,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::Graph>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56000,
            "real_time": 12356.560714286908,
            "cpu_time": 12276.785714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_execute_dependency_chain<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56000,
            "real_time": 12459.033928572742,
            "cpu_time": 12276.785714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::Graph>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::Graph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4592369.798608569,
            "cpu_time": 4404362.416107383,
            "time_unit": "ns"
          },
          {
            "name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_forward_propagator_node<dispenso::BiPropGraph>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 149,
            "real_time": 4386797.315437556,
            "cpu_time": 4299496.644295302,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "graph_scene_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:50:34-07:00",
          "host_name": "windows-bench",
          "executable": "graph_scene_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_scene_graph_parallel_for/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_parallel_for/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 28857943.478294108,
            "cpu_time": 27853260.86956522,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_concurrent_task_set/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_concurrent_task_set/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53,
            "real_time": 11973226.415064791,
            "cpu_time": 11497641.509433962,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_taskflow/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 18099464.864856087,
            "cpu_time": 844594.5945945946,
            "time_unit": "ns"
          },
          {
            "name": "BM_scene_graph_partial_revaluation/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_scene_graph_partial_revaluation/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1156,
            "real_time": 601544.1176478347,
            "cpu_time": 554173.875432526,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "idle_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:50:42-07:00",
          "host_name": "windows-bench",
          "executable": "idle_pool_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_tbb_mostly_idle/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6837,
            "real_time": 108.8943249964334,
            "cpu_time": 105.12651747842621,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9583,
            "real_time": 69.1143796305345,
            "cpu_time": 66.85015130961077,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8456,
            "real_time": 82.3244678334926,
            "cpu_time": 83.15101702932829,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9276,
            "real_time": 85.62411599828017,
            "cpu_time": 82.53827080638206,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10461,
            "real_time": 67.20751362197133,
            "cpu_time": 67.21393748207629,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_mostly_idle/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9966,
            "real_time": 69.92567730279316,
            "cpu_time": 64.2810555890026,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_mostly_idle/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6478,
            "real_time": 104.24887310905096,
            "cpu_time": 103.71642482247607,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/16/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_mostly_idle/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6394,
            "real_time": 106.03587738499377,
            "cpu_time": 102.63528307788552,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/24/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_mostly_idle/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4425,
            "real_time": 156.75190960457957,
            "cpu_time": 155.36723163841808,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/32/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_mostly_idle/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3745,
            "real_time": 187.40803738333446,
            "cpu_time": 187.75033377837116,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/48/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_mostly_idle/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1999,
            "real_time": 363.61430715384876,
            "cpu_time": 359.55477738869433,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/64/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_mostly_idle/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 503.2907000004343,
            "cpu_time": 500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 705,
            "real_time": 1000.6933333323826,
            "cpu_time": 953.0141843971631,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 940,
            "real_time": 620.2325531908269,
            "cpu_time": 615.0265957446809,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1111,
            "real_time": 636.8153915394709,
            "cpu_time": 632.8757875787579,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1035,
            "real_time": 673.0084057965098,
            "cpu_time": 664.2512077294685,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1035,
            "real_time": 668.5194202899753,
            "cpu_time": 664.2512077294685,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb_mostly_idle/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 983,
            "real_time": 714.8282807727232,
            "cpu_time": 699.3896236012207,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb_mostly_idle/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 792,
            "real_time": 969.1446969697214,
            "cpu_time": 966.6982323232323,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/16/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb_mostly_idle/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 627,
            "real_time": 945.3789473677593,
            "cpu_time": 946.969696969697,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/24/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb_mostly_idle/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 491,
            "real_time": 1489.867413442556,
            "cpu_time": 1495.6720977596742,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/32/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb_mostly_idle/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 352,
            "real_time": 2000.9383522728347,
            "cpu_time": 1908.7357954545455,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/48/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb_mostly_idle/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 212,
            "real_time": 3367.8325471684284,
            "cpu_time": 3390.330188679245,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/64/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb_mostly_idle/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 137,
            "real_time": 4465.587591242509,
            "cpu_time": 4219.890510948905,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 97064.52857140642,
            "cpu_time": 95982.14285714286,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 60487.027272674,
            "cpu_time": 61079.545454545456,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 81091.33333336003,
            "cpu_time": 78125.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 67081.8818181777,
            "cpu_time": 66761.36363636363,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 66798.31111109201,
            "cpu_time": 64236.11111111111,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/8/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb_mostly_idle/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 81787.74444442954,
            "cpu_time": 81597.22222222222,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/12/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb_mostly_idle/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 88620.65000005258,
            "cpu_time": 87890.625,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/16/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb_mostly_idle/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 126471.58333326539,
            "cpu_time": 122395.83333333333,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/24/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb_mostly_idle/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 140749.56000004022,
            "cpu_time": 140625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/32/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb_mostly_idle/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 179141.89999987686,
            "cpu_time": 171875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/48/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb_mostly_idle/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 342533.0000000031,
            "cpu_time": 343750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_mostly_idle/64/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb_mostly_idle/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 473776.59999983734,
            "cpu_time": 406250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/1/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 109419.05000011805,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 109621.98333330282,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/3/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110211.93333332728,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/4/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110327.7166666885,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/6/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110105.26666662675,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb_very_idle/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 110184.99999993634,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/12/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb_very_idle/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110196.56666667288,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/16/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb_very_idle/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 109524.91666663869,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/24/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb_very_idle/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110043.38333335302,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb_very_idle/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 109811.11666660581,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/48/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb_very_idle/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110293.11666667734,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb_very_idle/64/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb_very_idle/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 110031.40000002533,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mostly_idle/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85733,
            "real_time": 8.091717308393113,
            "cpu_time": 8.019082500320764,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_mostly_idle/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92005,
            "real_time": 7.865715993701925,
            "cpu_time": 7.812075430683115,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_mostly_idle/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 101385,
            "real_time": 7.062882083147959,
            "cpu_time": 6.9351975144252105,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_mostly_idle/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35160,
            "real_time": 23.93396188851053,
            "cpu_time": 23.997440273037544,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_mostly_idle/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 739.6942999994279,
            "cpu_time": 734.375,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_mostly_idle/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1084,
            "real_time": 612.7190036897774,
            "cpu_time": 619.8108856088561,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_mostly_idle/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 928,
            "real_time": 1178.2056034480224,
            "cpu_time": 1161.7726293103449,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_mostly_idle/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 493,
            "real_time": 1309.8851926987982,
            "cpu_time": 1299.4421906693713,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_mostly_idle/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 486,
            "real_time": 1296.4471193411969,
            "cpu_time": 1253.858024691358,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_mostly_idle/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 492,
            "real_time": 1196.9085365851752,
            "cpu_time": 1175.05081300813,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_mostly_idle/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 534,
            "real_time": 1688.3561797755071,
            "cpu_time": 1638.5767790262173,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_mostly_idle/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 461,
            "real_time": 1514.9509761400375,
            "cpu_time": 1525.2169197396963,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_mostly_idle/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17440,
            "real_time": 36.18218463303709,
            "cpu_time": 34.941227064220186,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_mostly_idle/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18423,
            "real_time": 36.99201541547904,
            "cpu_time": 36.46935895348206,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_mostly_idle/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17408,
            "real_time": 39.99319278492162,
            "cpu_time": 37.69818474264706,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_mostly_idle/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17401,
            "real_time": 40.5917188667003,
            "cpu_time": 40.40716050801678,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_mostly_idle/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 739.9537000001146,
            "cpu_time": 703.125,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_mostly_idle/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 7024.2270000017015,
            "cpu_time": 6718.75,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_mostly_idle/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92,
            "real_time": 12050.136956521903,
            "cpu_time": 11888.58695652174,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/16/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_mostly_idle/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 13561.712765952874,
            "cpu_time": 12632.978723404256,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/24/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_mostly_idle/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52,
            "real_time": 13500.390384618953,
            "cpu_time": 12920.673076923076,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/32/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_mostly_idle/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52,
            "real_time": 14603.163461533068,
            "cpu_time": 13822.115384615385,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/48/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_mostly_idle/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 14461.65434781901,
            "cpu_time": 14605.978260869566,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/64/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_mostly_idle/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 15286.295652181847,
            "cpu_time": 14605.978260869566,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_mostly_idle/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 219,
            "real_time": 3199.7547945205097,
            "cpu_time": 3067.922374429224,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_mostly_idle/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 218,
            "real_time": 3199.5041284379677,
            "cpu_time": 3081.9954128440368,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_mostly_idle/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 218,
            "real_time": 3391.3839449536295,
            "cpu_time": 3368.6926605504586,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_mostly_idle/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 218,
            "real_time": 3227.848623854188,
            "cpu_time": 3225.3440366972477,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_mostly_idle/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 205,
            "real_time": 4759.788292686586,
            "cpu_time": 4496.951219512195,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/8/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_mostly_idle/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 62171.90172413932,
            "cpu_time": 61961.206896551725,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/12/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_mostly_idle/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1307685.699999638,
            "cpu_time": 1312500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/16/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_mostly_idle/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1238735.9000003925,
            "cpu_time": 1234375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/24/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_mostly_idle/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1438651.7000002642,
            "cpu_time": 1421875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/32/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_mostly_idle/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1432174.6999994502,
            "cpu_time": 1437500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/48/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_mostly_idle/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1476455.6000000085,
            "cpu_time": 1484375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_mostly_idle/64/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_mostly_idle/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1503446.4999998817,
            "cpu_time": 1500000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/1/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_very_idle/1/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100577.44285716451,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_very_idle/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100477.97142864771,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/3/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_very_idle/3/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100540.0285714911,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/4/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_very_idle/4/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100614.27142863977,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/6/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_very_idle/6/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100561.41428568869,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_very_idle/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100445.54285715484,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/12/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_very_idle/12/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100716.21428583706,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/16/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_very_idle/16/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100619.41428570468,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/24/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_very_idle/24/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100809.07142855723,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_very_idle/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100698.04285714262,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/48/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_very_idle/48/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100645.08571420966,
            "cpu_time": 0.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_very_idle/64/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_very_idle/64/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100877.25714285106,
            "cpu_time": 0.0,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "locality_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:52:13-07:00",
          "host_name": "windows-bench",
          "executable": "locality_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 407,
            "real_time": 1785623.0958210195,
            "cpu_time": 1765970.515970516,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 65261954.54551437,
            "cpu_time": 65340909.09090909,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 537617699.9998279,
            "cpu_time": 546875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 151,
            "real_time": 4713486.754969339,
            "cpu_time": 4759933.774834437,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 625,
            "real_time": 1118057.2799989022,
            "cpu_time": 1100000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1294,
            "real_time": 509874.96136003983,
            "cpu_time": 482998.4544049459,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1357,
            "real_time": 513410.16949110146,
            "cpu_time": 506632.27708179806,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/100000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1387,
            "real_time": 509839.9423218571,
            "cpu_time": 506939.43763518386,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 103735385.71418197,
            "cpu_time": 102678571.42857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 20089636.842098307,
            "cpu_time": 20148026.315789472,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 6958756.122448718,
            "cpu_time": 7015306.12244898,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 107,
            "real_time": 6393008.411221196,
            "cpu_time": 6133177.570093458,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/4000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 105,
            "real_time": 6524390.476191738,
            "cpu_time": 6547619.047619048,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 804808199.9995703,
            "cpu_time": 796875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 145316299.99983126,
            "cpu_time": 134375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 94043187.50000584,
            "cpu_time": 87890625.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102488714.28575513,
            "cpu_time": 95982142.85714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/128/32000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 102389357.14291334,
            "cpu_time": 95982142.85714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 496,
            "real_time": 1398130.0403227785,
            "cpu_time": 1417590.7258064516,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1708,
            "real_time": 640431.7915690942,
            "cpu_time": 631220.7259953162,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2592,
            "real_time": 257724.575617164,
            "cpu_time": 259211.03395061727,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1841,
            "real_time": 394259.36990761163,
            "cpu_time": 398900.0543183053,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/100000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1001,
            "real_time": 655733.5664337942,
            "cpu_time": 452672.3276723277,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 66101881.81821286,
            "cpu_time": 66761363.63636363,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 17390972.500015777,
            "cpu_time": 16796875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83,
            "real_time": 7736593.975907058,
            "cpu_time": 7718373.493975904,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 93,
            "real_time": 7678023.655918428,
            "cpu_time": 7728494.623655914,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/4000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 98,
            "real_time": 6323190.816323292,
            "cpu_time": 5420918.367346939,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 557717400.0000013,
            "cpu_time": 562500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 217494299.99999848,
            "cpu_time": 218750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 97710157.14289077,
            "cpu_time": 89285714.28571428,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 104560383.33328857,
            "cpu_time": 101562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/128/32000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 98818785.71429427,
            "cpu_time": 35714285.71428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 503,
            "real_time": 1372865.0099412487,
            "cpu_time": 1366799.2047713718,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/4/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1422,
            "real_time": 396449.9296767358,
            "cpu_time": 395569.62025316455,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/16/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2670,
            "real_time": 287872.9962547995,
            "cpu_time": 286750.936329588,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/64/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 983,
            "real_time": 691375.1780263986,
            "cpu_time": 635808.7487283825,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/100000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/128/100000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 537,
            "real_time": 1333293.4823094946,
            "cpu_time": 1163873.370577281,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/1/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 68272969.99999817,
            "cpu_time": 67187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/4/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41,
            "real_time": 17701070.73171835,
            "cpu_time": 17149390.243902437,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/16/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 97,
            "real_time": 7704507.21649899,
            "cpu_time": 7409793.81443299,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/64/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 6780490.00000101,
            "cpu_time": 6093750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/4000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/128/4000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 90,
            "real_time": 7062852.222225046,
            "cpu_time": 7118055.555555556,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/1/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 559581600.0004561,
            "cpu_time": 562500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/4/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 174945149.999985,
            "cpu_time": 171875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/16/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 105072962.50000308,
            "cpu_time": 105468750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/64/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 123572514.28576481,
            "cpu_time": 89285714.28571428,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/128/32000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/128/32000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 103039814.28569387,
            "cpu_time": 98214285.71428572,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:53:08-07:00",
          "host_name": "windows-bench",
          "executable": "nested_for_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 224000,
            "real_time": 3286.5691964291595,
            "cpu_time": 3208.7053571428573,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4073,
            "real_time": 165775.9391111878,
            "cpu_time": 161122.02307881168,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 747,
            "real_time": 849949.6653277754,
            "cpu_time": 857597.0548862115,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4777,
            "real_time": 145914.33954359655,
            "cpu_time": 147189.65878166212,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9054,
            "real_time": 76030.04197040669,
            "cpu_time": 75933.2891539651,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10413,
            "real_time": 68772.11178339488,
            "cpu_time": 69024.29655238644,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11565,
            "real_time": 62312.82317343137,
            "cpu_time": 60797.6653696498,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12983,
            "real_time": 56159.78587382755,
            "cpu_time": 56564.353385196024,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13632,
            "real_time": 51719.34419020101,
            "cpu_time": 51579.005281690144,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14803,
            "real_time": 48535.97243800427,
            "cpu_time": 47498.817807201245,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15811,
            "real_time": 44645.04458926293,
            "cpu_time": 44470.621719056355,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15860,
            "real_time": 44210.11349302964,
            "cpu_time": 44333.228247162675,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16219,
            "real_time": 43529.391454463344,
            "cpu_time": 43351.93291818238,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14124,
            "real_time": 48519.50580573356,
            "cpu_time": 48676.01246105919,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/10/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19527,
            "real_time": 36625.71311519222,
            "cpu_time": 36007.83530496236,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 228494533.33340836,
            "cpu_time": 223958333.33333334,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 74110944.44453031,
            "cpu_time": 74652777.77777778,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 50489110.000035,
            "cpu_time": 51562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19,
            "real_time": 35876015.789462574,
            "cpu_time": 36184210.526315786,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 23832541.37930635,
            "cpu_time": 22629310.344827585,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 18014897.36843857,
            "cpu_time": 17680921.05263158,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 11929637.50000737,
            "cpu_time": 11997767.857142856,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 74,
            "real_time": 8773572.972979702,
            "cpu_time": 8657094.594594594,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108,
            "real_time": 6543461.111117789,
            "cpu_time": 6365740.740740741,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 119,
            "real_time": 4916952.941176005,
            "cpu_time": 4989495.798319328,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 164,
            "real_time": 4261844.512196083,
            "cpu_time": 4287347.560975609,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/500/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 172,
            "real_time": 4260097.093022121,
            "cpu_time": 4269622.0930232555,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4153688699.999293,
            "cpu_time": 4140625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1577121799.9999862,
            "cpu_time": 1562500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 999466399.9996191,
            "cpu_time": 1000000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 751229499.9996811,
            "cpu_time": 750000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 500239400.00028235,
            "cpu_time": 500000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 375621350.000074,
            "cpu_time": 367187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 252477166.66672206,
            "cpu_time": 250000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 190841374.99999088,
            "cpu_time": 191406250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 126194860.00003235,
            "cpu_time": 125000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 114451433.3332154,
            "cpu_time": 114583333.33333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 78378177.77787374,
            "cpu_time": 78125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/2500/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 65656236.363627784,
            "cpu_time": 66761363.63636363,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20659,
            "real_time": 33858.357132509685,
            "cpu_time": 33278.47427271407,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32581,
            "real_time": 21570.053098417982,
            "cpu_time": 21580.8293177005,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39131,
            "real_time": 17931.951649596445,
            "cpu_time": 17968.49045513787,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46350,
            "real_time": 15031.24703342606,
            "cpu_time": 14832.793959007551,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54394,
            "real_time": 12856.704783621133,
            "cpu_time": 12926.517630621025,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52849,
            "real_time": 13217.631364842755,
            "cpu_time": 13304.414463849836,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50863,
            "real_time": 13752.40154925209,
            "cpu_time": 13823.899494721114,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50129,
            "real_time": 14158.554928287698,
            "cpu_time": 14026.31211474396,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42674,
            "real_time": 16291.79828467407,
            "cpu_time": 16110.512255706051,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36424,
            "real_time": 19550.381616508203,
            "cpu_time": 19732.86843839227,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32965,
            "real_time": 23404.35310177283,
            "cpu_time": 23699.378128317912,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/10/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 30321,
            "real_time": 23205.286764936332,
            "cpu_time": 23189.373701395074,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 98900542.85707654,
            "cpu_time": 95982142.85714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 49213564.28576474,
            "cpu_time": 49107142.85714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 32481114.285734035,
            "cpu_time": 32738095.23809524,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 24127933.333349776,
            "cpu_time": 24305555.555555556,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 15949166.66666298,
            "cpu_time": 15625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 11932260.71429113,
            "cpu_time": 11997767.857142856,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81,
            "real_time": 7684871.604935864,
            "cpu_time": 7716049.382716049,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 107,
            "real_time": 6136046.728974281,
            "cpu_time": 6133177.570093458,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 140,
            "real_time": 5355518.571429586,
            "cpu_time": 5357142.857142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5103538.000003028,
            "cpu_time": 5156250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 194,
            "real_time": 3856418.0412341207,
            "cpu_time": 3704896.907216495,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/500/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 193,
            "real_time": 3519849.740934066,
            "cpu_time": 3562176.165803109,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2559967000.0001426,
            "cpu_time": 2500000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1282695899.999453,
            "cpu_time": 1265625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 863696499.9996053,
            "cpu_time": 859375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 661091900.0004287,
            "cpu_time": 656250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 435640500.0003179,
            "cpu_time": 437500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 326697049.99989337,
            "cpu_time": 328125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 217506166.66668066,
            "cpu_time": 218750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 164267100.0001883,
            "cpu_time": 164062500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 121834599.99996558,
            "cpu_time": 121875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 95636900.00004499,
            "cpu_time": 95703125.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 70992060.00002596,
            "cpu_time": 70312500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/2500/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 60689918.18185829,
            "cpu_time": 59659090.90909091,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto/1/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20784,
            "real_time": 33687.88972284375,
            "cpu_time": 33830.109699769055,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto/2/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36000,
            "real_time": 19424.450000011955,
            "cpu_time": 19531.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto/3/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37558,
            "real_time": 18663.15831513799,
            "cpu_time": 18721.044784067308,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto/4/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 41761,
            "real_time": 16755.180670978563,
            "cpu_time": 16836.881300735135,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto/6/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50145,
            "real_time": 13995.752318279374,
            "cpu_time": 14021.836673646425,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto/8/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47996,
            "real_time": 14570.145428787277,
            "cpu_time": 14649.658304858738,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto/12/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47869,
            "real_time": 14634.111846913034,
            "cpu_time": 14688.524932628632,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto/16/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46408,
            "real_time": 15070.533097749054,
            "cpu_time": 14814.256162730564,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/24/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto/24/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 39652,
            "real_time": 17532.777665676273,
            "cpu_time": 16944.29032583476,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/32/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto/32/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27787,
            "real_time": 24044.938280494644,
            "cpu_time": 24179.47241515817,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/48/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto/48/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27562,
            "real_time": 25295.323271181638,
            "cpu_time": 25510.6668601698,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/10/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto/64/10/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25228,
            "real_time": 27873.747423523204,
            "cpu_time": 27870.818138576185,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto/1/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 98946399.99999656,
            "cpu_time": 93750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto/2/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 50230000.00002347,
            "cpu_time": 50223214.28571428,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto/3/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33007485.714300685,
            "cpu_time": 31994047.61904762,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_auto/4/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 24616053.57143526,
            "cpu_time": 23995535.714285713,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_auto/6/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 15922557.14285784,
            "cpu_time": 15997023.80952381,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_auto/8/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 11890414.814809414,
            "cpu_time": 11574074.074074075,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_auto/12/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84,
            "real_time": 7494749.999999667,
            "cpu_time": 7440476.19047619,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_auto/16/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 110,
            "real_time": 5577084.545455529,
            "cpu_time": 5539772.7272727275,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/24/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_auto/24/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 178,
            "real_time": 3806923.595501747,
            "cpu_time": 3774578.6516853934,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/32/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_auto/32/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 207,
            "real_time": 3205513.526571792,
            "cpu_time": 3170289.855072464,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/48/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_auto/48/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 231,
            "real_time": 3221090.909088768,
            "cpu_time": 3111471.8614718616,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/500/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_auto/64/500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 244,
            "real_time": 3018158.196721101,
            "cpu_time": 2881659.8360655736,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/1/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_auto/1/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2616940200.0004992,
            "cpu_time": 2578125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/2/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_auto/2/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1296681300.000273,
            "cpu_time": 1281250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/3/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_auto/3/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 900569599.9994714,
            "cpu_time": 890625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/4/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_auto/4/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 655600399.9994572,
            "cpu_time": 656250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/6/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_auto/6/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 443145550.0002812,
            "cpu_time": 445312500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/8/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_auto/8/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 328264000.0002175,
            "cpu_time": 328125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/12/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_auto/12/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 215561666.66687202,
            "cpu_time": 213541666.66666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/16/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_auto/16/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 164372725.0000211,
            "cpu_time": 160156250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/24/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_auto/24/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 109581100.00001396,
            "cpu_time": 109375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/32/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_auto/32/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 84895275.00000805,
            "cpu_time": 83984375.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/48/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_auto/48/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 66725381.81820445,
            "cpu_time": 65340909.09090909,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto/64/2500/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_auto/64/2500/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12,
            "real_time": 56781658.33334485,
            "cpu_time": 55989583.333333336,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "nested_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:54:59-07:00",
          "host_name": "windows-bench",
          "executable": "nested_pool_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 168,
            "real_time": 3892.6750000016564,
            "cpu_time": 3813.2440476190477,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 278,
            "real_time": 2550.5402877701167,
            "cpu_time": 2473.021582733813,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 277,
            "real_time": 2543.303610107594,
            "cpu_time": 2481.9494584837544,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 341,
            "real_time": 2005.9129032249616,
            "cpu_time": 2016.1290322580646,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 370,
            "real_time": 2024.781081080299,
            "cpu_time": 1942.5675675675675,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 373,
            "real_time": 1893.734584448601,
            "cpu_time": 1885.053619302949,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 397,
            "real_time": 1753.516120907744,
            "cpu_time": 1731.7380352644836,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 442,
            "real_time": 1575.6957013578703,
            "cpu_time": 1590.7805429864254,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 469,
            "real_time": 1474.6793176973738,
            "cpu_time": 1432.5692963752665,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 493,
            "real_time": 1450.199188640012,
            "cpu_time": 1457.9107505070995,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 536,
            "real_time": 1304.199626864924,
            "cpu_time": 1311.8003731343283,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 512,
            "real_time": 1382.9853515634481,
            "cpu_time": 1373.291015625,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 105959.16666655587,
            "cpu_time": 104166.66666666667,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 54654.66923082064,
            "cpu_time": 55288.46153846154,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13,
            "real_time": 52332.97692302586,
            "cpu_time": 52884.61538461538,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 49357.07142859818,
            "cpu_time": 49107.142857142855,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41991.029411773896,
            "cpu_time": 41360.294117647056,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20,
            "real_time": 35743.05500001174,
            "cpu_time": 35937.5,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 30247.943478254612,
            "cpu_time": 30570.652173913044,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 24344.693103432353,
            "cpu_time": 24245.689655172413,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 20412.067647050528,
            "cpu_time": 20220.58823529412,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 19978.605882362684,
            "cpu_time": 19761.029411764706,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 17486.680000001797,
            "cpu_time": 17578.125,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40,
            "real_time": 17482.43750000711,
            "cpu_time": 17187.5,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 18044294.700000137,
            "cpu_time": 17921875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 7065072.499999587,
            "cpu_time": 7000000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5501108.999999815,
            "cpu_time": 5500000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4483277.099999214,
            "cpu_time": 4468750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3272344.9000004623,
            "cpu_time": 3281250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2602393.4999993797,
            "cpu_time": 2546875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1837074.6999999029,
            "cpu_time": 1781250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1465866.3999998681,
            "cpu_time": 1453125.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1134443.299999475,
            "cpu_time": 1109375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1034357.9000000318,
            "cpu_time": 1046875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 910747.8999994782,
            "cpu_time": 906250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/300000/real_time",
            "family_index": 0,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 940844.6999996158,
            "cpu_time": 937500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 805,
            "real_time": 863.2724223609545,
            "cpu_time": 834.6273291925465,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 556,
            "real_time": 1199.5239208628598,
            "cpu_time": 1208.408273381295,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 613,
            "real_time": 1150.5110929858026,
            "cpu_time": 1045.0652528548123,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 823,
            "real_time": 875.495139733226,
            "cpu_time": 873.329283110571,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 832,
            "real_time": 905.1462740381147,
            "cpu_time": 901.4423076923077,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 814,
            "real_time": 943.8653562656708,
            "cpu_time": 959.7665847665847,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 682,
            "real_time": 1176.5151026389249,
            "cpu_time": 1145.5278592375366,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 688,
            "real_time": 1117.1101744176392,
            "cpu_time": 1112.827034883721,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 437,
            "real_time": 1574.7970251713277,
            "cpu_time": 1537.4713958810069,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 380,
            "real_time": 1899.4592105268807,
            "cpu_time": 1809.2105263157894,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 313,
            "real_time": 2047.8376996811166,
            "cpu_time": 1996.8051118210863,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 248,
            "real_time": 2754.6197580638745,
            "cpu_time": 2772.1774193548385,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 24995.22500001409,
            "cpu_time": 25111.60714285714,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 20401.8171428418,
            "cpu_time": 20089.285714285714,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21819.796874979147,
            "cpu_time": 21484.375,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 16063.281081064692,
            "cpu_time": 15202.702702702703,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 18918.259183678576,
            "cpu_time": 18813.775510204083,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61,
            "real_time": 11774.160655730782,
            "cpu_time": 11526.639344262296,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 79,
            "real_time": 11747.431645571965,
            "cpu_time": 11669.303797468354,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57,
            "real_time": 11700.133333330374,
            "cpu_time": 10964.912280701754,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 21380.447826073658,
            "cpu_time": 20380.434782608696,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 27,
            "real_time": 25750.955555546778,
            "cpu_time": 24884.25925925926,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26,
            "real_time": 26381.953846164553,
            "cpu_time": 26442.30769230769,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 28146.021428580883,
            "cpu_time": 28459.821428571428,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4031846.300000325,
            "cpu_time": 4000000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 728813.4000000355,
            "cpu_time": 718750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 889849.3000006056,
            "cpu_time": 890625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 994105.6999996363,
            "cpu_time": 968750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1049320.0000000796,
            "cpu_time": 1046875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1522894.499999893,
            "cpu_time": 1484375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 997350.8000002766,
            "cpu_time": 968750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1162328.2000000472,
            "cpu_time": 1140625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 719202.5000003923,
            "cpu_time": 703125.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 590710.8999999764,
            "cpu_time": 593750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 538416.5999994366,
            "cpu_time": 531250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/300000/real_time",
            "family_index": 1,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 389333.69999995193,
            "cpu_time": 375000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2933,
            "real_time": 239.12471871795694,
            "cpu_time": 234.4016365496079,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 672,
            "real_time": 993.2983630941786,
            "cpu_time": 976.5625,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 872,
            "real_time": 734.3979357791786,
            "cpu_time": 734.6616972477065,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2240,
            "real_time": 347.99933035734347,
            "cpu_time": 348.77232142857144,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1754,
            "real_time": 431.1715507410948,
            "cpu_time": 427.594070695553,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2964,
            "real_time": 233.20158569493464,
            "cpu_time": 221.4068825910931,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1831,
            "real_time": 487.40065537951824,
            "cpu_time": 486.4145275805571,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1980,
            "real_time": 296.3487878787418,
            "cpu_time": 299.87373737373736,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1557,
            "real_time": 493.6928066792973,
            "cpu_time": 451.5895953757225,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1376,
            "real_time": 509.50043604641087,
            "cpu_time": 476.92587209302326,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1704,
            "real_time": 396.0932511737869,
            "cpu_time": 394.2928403755869,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2008,
            "real_time": 426.3131474107398,
            "cpu_time": 404.6314741035857,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 104,
            "real_time": 6700.27788461224,
            "cpu_time": 6760.817307692308,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 7865.334090907213,
            "cpu_time": 7634.943181818182,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 108,
            "real_time": 6414.663888891583,
            "cpu_time": 6365.740740740741,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 88,
            "real_time": 8359.228409093786,
            "cpu_time": 7102.272727272727,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 4203.702840909914,
            "cpu_time": 4083.806818181818,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_bulk/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 183,
            "real_time": 3326.603825132865,
            "cpu_time": 3329.9180327868853,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_bulk/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 197,
            "real_time": 4188.728934012236,
            "cpu_time": 4203.680203045686,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_bulk/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 347,
            "real_time": 2857.99221901916,
            "cpu_time": 2836.815561959654,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_bulk/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 179,
            "real_time": 3676.2754189925413,
            "cpu_time": 3491.6201117318437,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_bulk/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 203,
            "real_time": 3729.014285714324,
            "cpu_time": 3771.551724137931,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_bulk/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213,
            "real_time": 3748.9309859179034,
            "cpu_time": 3741.1971830985917,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_bulk/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 216,
            "real_time": 3348.57685185044,
            "cpu_time": 3327.546296296296,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_bulk/1/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1021553.2000001986,
            "cpu_time": 1015625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_bulk/2/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 550953.599999957,
            "cpu_time": 546875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_bulk/3/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 808491.700000559,
            "cpu_time": 781250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_bulk/4/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 575432.6000005676,
            "cpu_time": 562500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_bulk/6/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 428497.8999999112,
            "cpu_time": 421875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_bulk/8/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 301521.5999998873,
            "cpu_time": 281250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_bulk/12/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 271191.1500000497,
            "cpu_time": 265625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_bulk/16/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 184467.9000000724,
            "cpu_time": 183593.75,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_bulk/24/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 160944.57500003045,
            "cpu_time": 160156.25,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_bulk/32/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 123093.80000003027,
            "cpu_time": 122395.83333333333,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_bulk/48/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 101482.81428571083,
            "cpu_time": 100446.42857142857,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/300000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_bulk/64/300000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 94858.25000001569,
            "cpu_time": 93750.0,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "once_function_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:57:26-07:00",
          "host_name": "windows-bench",
          "executable": "once_function_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_move_std_function<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8960000,
            "real_time": 84.60370535721447,
            "cpu_time": 85.44921875,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kSmallSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56000000,
            "real_time": 13.297325000004482,
            "cpu_time": 13.392857142857142,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kMediumSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7466667,
            "real_time": 87.93886214566874,
            "cpu_time": 85.79798724116128,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kMediumSize>",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64000000,
            "real_time": 12.44779218750125,
            "cpu_time": 12.20703125,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kLargeSize>",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8960000,
            "real_time": 85.50149553577009,
            "cpu_time": 85.44921875,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kLargeSize>",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44800000,
            "real_time": 15.474499999998313,
            "cpu_time": 15.694754464285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_std_function<kExtraLargeSize>",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_move_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6400000,
            "real_time": 99.84706250008912,
            "cpu_time": 100.09765625,
            "time_unit": "ns"
          },
          {
            "name": "BM_move_once_function<kExtraLargeSize>",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_move_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11200000,
            "real_time": 63.17638392853756,
            "cpu_time": 64.17410714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kSmallSize>",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 407273,
            "real_time": 1728.3028828328033,
            "cpu_time": 1726.4218349853784,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kSmallSize>",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44800,
            "real_time": 15993.8772321563,
            "cpu_time": 16043.526785714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kSmallSize>",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 165926,
            "real_time": 4151.649530510759,
            "cpu_time": 4143.413328833335,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kMediumSize>",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 298667,
            "real_time": 2298.9828136373135,
            "cpu_time": 2301.8947523496067,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kMediumSize>",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44800,
            "real_time": 15449.176339278178,
            "cpu_time": 14997.20982142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kMediumSize>",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154483,
            "real_time": 4444.022319605726,
            "cpu_time": 4450.327867791278,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kLargeSize>",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 179200,
            "real_time": 4106.250000001564,
            "cpu_time": 4098.074776785715,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kLargeSize>",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 40727,
            "real_time": 16863.257789670104,
            "cpu_time": 16880.69339750043,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kLargeSize>",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 112000,
            "real_time": 5714.958035712568,
            "cpu_time": 5719.866071428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_inline_function<kExtraLargeSize>",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_inline_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56000,
            "real_time": 11814.530357144447,
            "cpu_time": 11718.75,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_std_function<kExtraLargeSize>",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_std_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24977,
            "real_time": 27332.48588702272,
            "cpu_time": 27525.32329743364,
            "time_unit": "ns"
          },
          {
            "name": "BM_queue_once_function<kExtraLargeSize>",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_queue_once_function<kExtraLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24889,
            "real_time": 24791.373699199172,
            "cpu_time": 24483.707662019366,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pipeline_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:57:47-07:00",
          "host_name": "windows-bench",
          "executable": "pipeline_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 517309400.00015837,
            "cpu_time": 515625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 323648450.00029165,
            "cpu_time": 125000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 345052449.9999119,
            "cpu_time": 296875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 279032433.33336526,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_par/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 17289443.24325287,
            "cpu_time": 2956081.081081081,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb_par/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 15333088.888898298,
            "cpu_time": 14583333.333333334,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow_par/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow_par/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 177765800.00008833,
            "cpu_time": 0.0,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "pool_allocator_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T08:57:56-07:00",
          "host_name": "windows-bench",
          "executable": "pool_allocator_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_mallocfree<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 145,
            "real_time": 4841874.482754692,
            "cpu_time": 4849137.931034483,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 20133108.82351588,
            "cpu_time": 20220588.23529412,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 151697.4553571312,
            "cpu_time": 153459.82142857142,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 608897.767857083,
            "cpu_time": 613839.2857142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19478,
            "real_time": 36529.972276425644,
            "cpu_time": 36900.605811684974,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 149938.8169644281,
            "cpu_time": 149972.0982142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 15900661.76469704,
            "cpu_time": 15625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 86727966.66678551,
            "cpu_time": 88541666.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 151710.62499998047,
            "cpu_time": 149972.0982142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kMediumSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 610194.3750000568,
            "cpu_time": 613839.2857142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20364,
            "real_time": 34454.390100169076,
            "cpu_time": 34527.84325279906,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4978,
            "real_time": 137750.74327024948,
            "cpu_time": 131830.05222981118,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/8192",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 52209740.000034794,
            "cpu_time": 51562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree<kLargeSize>/32768",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 338507800.0001158,
            "cpu_time": 335937500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/8192",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 151843.12499993018,
            "cpu_time": 149972.0982142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator<kLargeSize>/32768",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 896,
            "real_time": 613170.6473218352,
            "cpu_time": 610351.5625,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 18667,
            "real_time": 36677.227192370396,
            "cpu_time": 36829.699469652325,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 148020.87053575568,
            "cpu_time": 146484.375,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7467,
            "real_time": 87785.24173030464,
            "cpu_time": 87886.70148654078,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2036,
            "real_time": 354860.6581531803,
            "cpu_time": 360694.9901768173,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17920,
            "real_time": 38532.90736606547,
            "cpu_time": 38364.955357142855,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4978,
            "real_time": 153872.82040979038,
            "cpu_time": 153801.72760144636,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7467,
            "real_time": 87807.96839426439,
            "cpu_time": 85794.16097495648,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2036,
            "real_time": 369143.4184678627,
            "cpu_time": 360694.9901768173,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14933,
            "real_time": 42947.78008440383,
            "cpu_time": 39760.932163664365,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "family_index": 12,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3446,
            "real_time": 163558.2704584923,
            "cpu_time": 158698.49100406267,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5600,
            "real_time": 102888.89285707878,
            "cpu_time": 97656.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 13,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1673,
            "real_time": 396455.46921698225,
            "cpu_time": 382919.904363419,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15448,
            "real_time": 38635.22138789477,
            "cpu_time": 39446.85396167789,
            "time_unit": "ns"
          },
          {
            "name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "family_index": 14,
            "per_family_instance_index": 1,
            "run_name": "BM_nl_pool_allocator_arena<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 182755.6250000433,
            "cpu_time": 177873.88392857142,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 560,
            "real_time": 1282449.9999997637,
            "cpu_time": 1171875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "family_index": 15,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 179,
            "real_time": 5457345.251396258,
            "cpu_time": 5062849.162011174,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "family_index": 16,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 740431.6964287513,
            "cpu_time": 725446.4285714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "family_index": 16,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 2283438.6363614835,
            "cpu_time": 2249053.0303030303,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "family_index": 17,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 4142546.250000123,
            "cpu_time": 4199218.75,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "family_index": 17,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 134987949.99986786,
            "cpu_time": 85937500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "family_index": 18,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 617903.2142857263,
            "cpu_time": 613839.2857142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "family_index": 18,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 299,
            "real_time": 2138911.7056853967,
            "cpu_time": 2142558.5284280935,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "family_index": 19,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 361473675.0000702,
            "cpu_time": 242187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "family_index": 19,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1761543900.000106,
            "cpu_time": 1093750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "family_index": 20,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 896,
            "real_time": 632641.183035584,
            "cpu_time": 610351.5625,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "family_index": 20,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,2>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 2622329.166666587,
            "cpu_time": 2604166.6666666665,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "family_index": 21,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 213,
            "real_time": 4267534.7417848725,
            "cpu_time": 3301056.3380281692,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "family_index": 21,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 37,
            "real_time": 43508459.459460296,
            "cpu_time": 19425675.675675675,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "family_index": 22,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 28401976.00002284,
            "cpu_time": 28750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "family_index": 22,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 119940283.33341096,
            "cpu_time": 119791666.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "family_index": 23,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 157427864.7058771,
            "cpu_time": 54227941.176470585,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "family_index": 23,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 1071541200.0004108,
            "cpu_time": 289062500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "family_index": 24,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 24,
            "real_time": 28545066.66665202,
            "cpu_time": 27994791.666666668,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "family_index": 24,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 125883900.00000799,
            "cpu_time": 125000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "family_index": 25,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2990842500.000326,
            "cpu_time": 1203125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "family_index": 25,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 29085644200.00039,
            "cpu_time": 6484375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "family_index": 26,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 30857290.476195063,
            "cpu_time": 30505952.38095238,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "family_index": 26,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,8>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 148626225.00004363,
            "cpu_time": 148437500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "family_index": 27,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 29071062.000002712,
            "cpu_time": 13125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "family_index": 27,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 315045845.45448536,
            "cpu_time": 116477272.72727273,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "family_index": 28,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 147053179.99996623,
            "cpu_time": 146875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "family_index": 28,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kSmallSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 575132300.000405,
            "cpu_time": 546875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "family_index": 29,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 1089591550.000023,
            "cpu_time": 206250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "family_index": 29,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 6414324400.000624,
            "cpu_time": 3578125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "family_index": 30,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 139295816.66676918,
            "cpu_time": 138020833.33333334,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "family_index": 30,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kMediumSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 614732399.9994115,
            "cpu_time": 609375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "family_index": 31,
            "per_family_instance_index": 0,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 20524383299.9995,
            "cpu_time": 2375000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "family_index": 31,
            "per_family_instance_index": 1,
            "run_name": "BM_mallocfree_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 126245421600.00055,
            "cpu_time": 20328125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "family_index": 32,
            "per_family_instance_index": 0,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 143203459.99985877,
            "cpu_time": 143750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "family_index": 32,
            "per_family_instance_index": 1,
            "run_name": "BM_pool_allocator_threaded<kLargeSize,16>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 868872099.9998623,
            "cpu_time": 859375000.0,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "rw_lock_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:02:27-07:00",
          "host_name": "windows-bench",
          "executable": "rw_lock_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<NopMutex>/2/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<NopMutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 620,
            "real_time": 1117013.8709677567,
            "cpu_time": 1108870.9677419355,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/8/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<NopMutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 933,
            "real_time": 751688.8531617045,
            "cpu_time": 753617.3633440514,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/32/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<NopMutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1002,
            "real_time": 696887.0259483511,
            "cpu_time": 686127.744510978,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/128/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<NopMutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 935,
            "real_time": 743824.0641708043,
            "cpu_time": 735294.1176470588,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<NopMutex>/512/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<NopMutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1017,
            "real_time": 691569.5181911967,
            "cpu_time": 676007.866273353,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/2/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<std::shared_mutex>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 19305188.888867836,
            "cpu_time": 19097222.222222224,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/8/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<std::shared_mutex>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 19369341.666662067,
            "cpu_time": 19097222.222222224,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/32/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<std::shared_mutex>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 19416355.55555068,
            "cpu_time": 19097222.222222224,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/128/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<std::shared_mutex>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 19409374.999996647,
            "cpu_time": 19097222.222222224,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<std::shared_mutex>/512/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<std::shared_mutex>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 19450205.55556059,
            "cpu_time": 19531250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/2/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<dispenso::RWLock>/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 16413025.58138789,
            "cpu_time": 15988372.093023255,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/8/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_serial<dispenso::RWLock>/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 15095457.446806634,
            "cpu_time": 15292553.191489361,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/32/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_serial<dispenso::RWLock>/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48,
            "real_time": 14557727.083342798,
            "cpu_time": 14322916.666666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/128/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_serial<dispenso::RWLock>/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14445100.000000598,
            "cpu_time": 14030612.24489796,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<dispenso::RWLock>/512/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_serial<dispenso::RWLock>/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14355700.000012396,
            "cpu_time": 14349489.795918368,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<std::shared_mutex>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 35,
            "real_time": 19930965.714281358,
            "cpu_time": 19642857.14285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<std::shared_mutex>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 60408490.000008896,
            "cpu_time": 59375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<std::shared_mutex>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 337606033.3333347,
            "cpu_time": 333333333.3333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<std::shared_mutex>/8/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1040186500.0003453,
            "cpu_time": 984375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<std::shared_mutex>/16/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2435386000.000108,
            "cpu_time": 2296875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/2/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<std::shared_mutex>/32/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5883264100.000815,
            "cpu_time": 4656250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<std::shared_mutex>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 36,
            "real_time": 21549513.888885483,
            "cpu_time": 21267361.111111112,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<std::shared_mutex>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 75667622.22217625,
            "cpu_time": 74652777.77777778,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<std::shared_mutex>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 314955900.0003137,
            "cpu_time": 312500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<std::shared_mutex>/8/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1079131499.9996758,
            "cpu_time": 1078125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<std::shared_mutex>/16/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2836406999.999781,
            "cpu_time": 2765625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/8/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<std::shared_mutex>/32/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 6595037799.999773,
            "cpu_time": 5468750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<std::shared_mutex>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 21245997.058833703,
            "cpu_time": 21139705.88235294,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<std::shared_mutex>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 72240179.9999716,
            "cpu_time": 71875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<std::shared_mutex>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 317916500.0001376,
            "cpu_time": 312500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_parallel<std::shared_mutex>/8/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 996513100.0008113,
            "cpu_time": 1000000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_parallel<std::shared_mutex>/16/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2575993400.000698,
            "cpu_time": 2312500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/32/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_parallel<std::shared_mutex>/32/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 6022973900.000579,
            "cpu_time": 4453125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_parallel<std::shared_mutex>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 20044875.7575831,
            "cpu_time": 20359848.484848484,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_parallel<std::shared_mutex>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 74689120.00003912,
            "cpu_time": 73437500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_parallel<std::shared_mutex>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 339969900.0001419,
            "cpu_time": 335937500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_parallel<std::shared_mutex>/8/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 889150599.9999936,
            "cpu_time": 890625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_parallel<std::shared_mutex>/16/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2100237499.999821,
            "cpu_time": 2062500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/128/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_parallel<std::shared_mutex>/32/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 5162757999.999485,
            "cpu_time": 5140625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_parallel<std::shared_mutex>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 21410923.52940177,
            "cpu_time": 20680147.05882353,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_parallel<std::shared_mutex>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 77721177.77777667,
            "cpu_time": 78125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_parallel<std::shared_mutex>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 346075800.0001078,
            "cpu_time": 328125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/8/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_parallel<std::shared_mutex>/8/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 910926999.9999014,
            "cpu_time": 906250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/16/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_parallel<std::shared_mutex>/16/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1920998399.9999168,
            "cpu_time": 1750000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<std::shared_mutex>/32/512/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_parallel<std::shared_mutex>/32/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4707656599.999609,
            "cpu_time": 4671875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_parallel<dispenso::RWLock>/1/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 16511460.465111203,
            "cpu_time": 16351744.186046511,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_parallel<dispenso::RWLock>/2/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 123026942.85712537,
            "cpu_time": 122767857.14285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_parallel<dispenso::RWLock>/4/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 651270000.0002952,
            "cpu_time": 625000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_parallel<dispenso::RWLock>/8/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2099785399.999746,
            "cpu_time": 2093750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_parallel<dispenso::RWLock>/16/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 12183545500.000036,
            "cpu_time": 12109375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/2/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_parallel<dispenso::RWLock>/32/2/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 47369681199.99983,
            "cpu_time": 46890625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_parallel<dispenso::RWLock>/1/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 15019802.127658585,
            "cpu_time": 14627659.574468086,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_parallel<dispenso::RWLock>/2/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 89142955.55556843,
            "cpu_time": 88541666.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_parallel<dispenso::RWLock>/4/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 365472749.9997534,
            "cpu_time": 359375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_parallel<dispenso::RWLock>/8/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1228708399.9999595,
            "cpu_time": 1203125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_parallel<dispenso::RWLock>/16/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4374994499.999957,
            "cpu_time": 4328125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/8/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_parallel<dispenso::RWLock>/32/8/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 20147908899.999493,
            "cpu_time": 19953125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_parallel<dispenso::RWLock>/1/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 15939482.222226312,
            "cpu_time": 15625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_parallel<dispenso::RWLock>/2/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 73564466.66674149,
            "cpu_time": 72916666.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_parallel<dispenso::RWLock>/4/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 281179333.33336014,
            "cpu_time": 281250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_parallel<dispenso::RWLock>/8/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 593563399.9997662,
            "cpu_time": 578125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_parallel<dispenso::RWLock>/16/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2002235500.000097,
            "cpu_time": 1984375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/32/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_parallel<dispenso::RWLock>/32/32/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 7846040399.999765,
            "cpu_time": 7843750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_parallel<dispenso::RWLock>/1/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14741991.836727927,
            "cpu_time": 14668367.346938776,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_parallel<dispenso::RWLock>/2/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 65577990.000019774,
            "cpu_time": 65625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_parallel<dispenso::RWLock>/4/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 168589574.99994177,
            "cpu_time": 164062500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_parallel<dispenso::RWLock>/8/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 417482050.00021696,
            "cpu_time": 421875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_parallel<dispenso::RWLock>/16/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1275850800.0006258,
            "cpu_time": 1265625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/128/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_parallel<dispenso::RWLock>/32/128/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3649717499.999497,
            "cpu_time": 3640625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_parallel<dispenso::RWLock>/1/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14390197.959166508,
            "cpu_time": 14349489.795918368,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_parallel<dispenso::RWLock>/2/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 59954890.90913577,
            "cpu_time": 59659090.90909091,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_parallel<dispenso::RWLock>/4/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 155646274.99998096,
            "cpu_time": 156250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/8/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_parallel<dispenso::RWLock>/8/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 373191650.0000807,
            "cpu_time": 359375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/16/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_parallel<dispenso::RWLock>/16/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 859905499.9995133,
            "cpu_time": 843750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_parallel<dispenso::RWLock>/32/512/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_parallel<dispenso::RWLock>/32/512/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2162233600.000036,
            "cpu_time": 2125000000.0,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:05:42-07:00",
          "host_name": "windows-bench",
          "executable": "simple_for_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5600000,
            "real_time": 123.07973214287163,
            "cpu_time": 119.97767857142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2133,
            "real_time": 319111.9549932008,
            "cpu_time": 322315.9868729489,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 67764627.2727295,
            "cpu_time": 63920454.54545455,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7638,
            "real_time": 94489.09400372862,
            "cpu_time": 94101.8591254255,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12668,
            "real_time": 55404.19955797613,
            "cpu_time": 55504.02589201137,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13726,
            "real_time": 52165.437855183314,
            "cpu_time": 52364.12647530234,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14206,
            "real_time": 49971.01224834739,
            "cpu_time": 49494.931718991975,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15840,
            "real_time": 45114.74747472575,
            "cpu_time": 45375.631313131315,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16035,
            "real_time": 44121.334580600385,
            "cpu_time": 44823.82288743374,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16553,
            "real_time": 42392.62973476715,
            "cpu_time": 42477.19446625989,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17287,
            "real_time": 40821.44385952365,
            "cpu_time": 41577.48597211778,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17682,
            "real_time": 38385.39192397927,
            "cpu_time": 37997.68125777627,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17828,
            "real_time": 38079.58828807129,
            "cpu_time": 38562.93470944581,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16360,
            "real_time": 43669.55378971126,
            "cpu_time": 43933.37408312959,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22205,
            "real_time": 31402.742625543535,
            "cpu_time": 30961.49515874803,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 302,
            "real_time": 2301049.999998864,
            "cpu_time": 2276490.0662251655,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 955,
            "real_time": 732378.3246071336,
            "cpu_time": 736256.5445026178,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1293,
            "real_time": 552477.5715394203,
            "cpu_time": 543793.5034802784,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1661,
            "real_time": 418109.99397952174,
            "cpu_time": 423314.268512944,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2319,
            "real_time": 314212.67787841236,
            "cpu_time": 316677.4471755067,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2881,
            "real_time": 251644.77611938928,
            "cpu_time": 249479.3474488025,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3736,
            "real_time": 184413.24946481115,
            "cpu_time": 184020.3426124197,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4671,
            "real_time": 153759.2164419024,
            "cpu_time": 150529.86512524085,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5454,
            "real_time": 122550.22002193381,
            "cpu_time": 117459.66263292996,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5677,
            "real_time": 118082.17368332973,
            "cpu_time": 118350.36110621807,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6404,
            "real_time": 107563.22610855168,
            "cpu_time": 107354.77826358526,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7338,
            "real_time": 94512.12864547076,
            "cpu_time": 95819.70564186428,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 194416074.9999355,
            "cpu_time": 195312500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 61618063.63632588,
            "cpu_time": 61079545.45454545,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41292594.11766885,
            "cpu_time": 40441176.47058824,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 33157647.82608154,
            "cpu_time": 31929347.826086957,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 20907515.151520785,
            "cpu_time": 20833333.333333332,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 16060631.818184199,
            "cpu_time": 15625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53,
            "real_time": 14373813.207539398,
            "cpu_time": 14445754.716981132,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 12140977.586207964,
            "cpu_time": 12122844.827586208,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 11818566.101699278,
            "cpu_time": 11917372.881355932,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 12474183.05084173,
            "cpu_time": 12447033.898305085,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 15330244.444440823,
            "cpu_time": 15625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 15240334.782625405,
            "cpu_time": 15285326.086956521,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_taskflow/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 530070.0000007055,
            "cpu_time": 31250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_taskflow/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 655714.800000169,
            "cpu_time": 78125.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_taskflow/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 553511.0000000714,
            "cpu_time": 93750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_taskflow/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 683684.3999999473,
            "cpu_time": 78125.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_taskflow/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 529424.0000002901,
            "cpu_time": 62500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_taskflow/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 566112.9000000074,
            "cpu_time": 93750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_taskflow/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1008582.3999997956,
            "cpu_time": 171875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_taskflow/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1520637.7000004067,
            "cpu_time": 281250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_taskflow/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 1579156.5999998057,
            "cpu_time": 140625.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_taskflow/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 2060363.6999994705,
            "cpu_time": 187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_taskflow/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 2633048.8999992665,
            "cpu_time": 296875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_taskflow/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 3749334.399999498,
            "cpu_time": 281250.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_taskflow/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 41611268.00000602,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_taskflow/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 30974307.999995288,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_taskflow/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 21487643.99999891,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_taskflow/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 16418552.000004638,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_taskflow/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 11053179.000000454,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_taskflow/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 8297672.999997304,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_taskflow/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 6320484.000007127,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_taskflow/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 140,
            "real_time": 6408918.571423783,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_taskflow/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 188,
            "real_time": 6326315.957450905,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_taskflow/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 233,
            "real_time": 6700124.034334154,
            "cpu_time": 134120.17167381974,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_taskflow/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 226,
            "real_time": 6581888.495575748,
            "cpu_time": 69137.16814159292,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_taskflow/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 223,
            "real_time": 6319112.107622977,
            "cpu_time": 70067.26457399104,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_taskflow/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 411014485.7143366,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_taskflow/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 385221509.09085953,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_taskflow/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 228389069.9999574,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_taskflow/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 170449150.0000586,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_taskflow/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 116734610.00000314,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_taskflow/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 87824870.00007677,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_taskflow/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 59986899.999967135,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_taskflow/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 52921700.00002443,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_taskflow/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 70141750.00003889,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_taskflow/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 60708569.99995158,
            "cpu_time": 0.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_taskflow/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 62731880.000046656,
            "cpu_time": 1562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_taskflow/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_taskflow/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10,
            "real_time": 60175500.0000594,
            "cpu_time": 1562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187646,
            "real_time": 3730.466943069201,
            "cpu_time": 3747.0822719375847,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 182616,
            "real_time": 3837.7929644697133,
            "cpu_time": 3850.2924168747536,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 182461,
            "real_time": 3838.174185171489,
            "cpu_time": 3853.5632272101984,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 181544,
            "real_time": 3846.697219407341,
            "cpu_time": 3786.9607367910808,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 651369,
            "real_time": 3776.0300229212608,
            "cpu_time": 3766.106461928646,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180785,
            "real_time": 3268.643969359266,
            "cpu_time": 3197.859335674973,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180282,
            "real_time": 3907.4644168556215,
            "cpu_time": 3900.1397810097515,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 177636,
            "real_time": 3930.4048728813827,
            "cpu_time": 3958.2348172667703,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 637615,
            "real_time": 3989.4425319353586,
            "cpu_time": 3994.377484845871,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 173004,
            "real_time": 3010.6535108992152,
            "cpu_time": 2980.4224179787752,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 446317,
            "real_time": 1198.065724584539,
            "cpu_time": 1190.297479145988,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 314947,
            "real_time": 1980.133482777899,
            "cpu_time": 1984.4608775444758,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 187,
            "real_time": 3740731.016040369,
            "cpu_time": 3760026.7379679144,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 371,
            "real_time": 1883934.2318081206,
            "cpu_time": 1895215.6334231806,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 554,
            "real_time": 1263728.158845147,
            "cpu_time": 1269178.7003610109,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 732,
            "real_time": 958718.169399642,
            "cpu_time": 960553.2786885246,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1047,
            "real_time": 641319.8662841313,
            "cpu_time": 641714.4221585483,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1338,
            "real_time": 518581.76382622484,
            "cpu_time": 513826.60687593423,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1857,
            "real_time": 356296.33818012616,
            "cpu_time": 353392.56865912763,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2129,
            "real_time": 337820.90183176304,
            "cpu_time": 337599.81211836543,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5387,
            "real_time": 200108.1678115046,
            "cpu_time": 188532.57842955264,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3079,
            "real_time": 299582.39688189875,
            "cpu_time": 299407.27508931473,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4746,
            "real_time": 115051.05351870028,
            "cpu_time": 115228.61356932153,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7731,
            "real_time": 88691.24304753881,
            "cpu_time": 86906.60975294269,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 374883649.99990153,
            "cpu_time": 375000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 188763274.99992839,
            "cpu_time": 187500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 125939799.99989338,
            "cpu_time": 127604166.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 94210857.14296038,
            "cpu_time": 93750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11,
            "real_time": 62940163.63633339,
            "cpu_time": 63920454.54545455,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 47400453.33334517,
            "cpu_time": 46875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22,
            "real_time": 31880404.54547676,
            "cpu_time": 31960227.272727273,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 24358575.86207366,
            "cpu_time": 24245689.655172415,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 38,
            "real_time": 17466794.73683942,
            "cpu_time": 17269736.842105262,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 16123037.209306562,
            "cpu_time": 16351744.186046511,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14825679.591829576,
            "cpu_time": 14349489.795918368,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14073414.2857155,
            "cpu_time": 13711734.693877552,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3910156,
            "real_time": 192.2742980076355,
            "cpu_time": 187.81219982016063,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3215419,
            "real_time": 205.48336002239023,
            "cpu_time": 204.09470740827246,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3697844,
            "real_time": 196.89473109197567,
            "cpu_time": 190.14458154535453,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2967162,
            "real_time": 197.20288275484938,
            "cpu_time": 194.84106361567046,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3681221,
            "real_time": 199.85670515299455,
            "cpu_time": 199.49223369094113,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static_chunk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3547248,
            "real_time": 187.27078005261265,
            "cpu_time": 185.00257100715822,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static_chunk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3507699,
            "real_time": 201.62063506587606,
            "cpu_time": 200.45192019041542,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/16/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static_chunk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3306012,
            "real_time": 205.15763403159514,
            "cpu_time": 203.2282399458925,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/24/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static_chunk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3462311,
            "real_time": 198.33362745290478,
            "cpu_time": 198.56679541496993,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/32/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static_chunk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3404659,
            "real_time": 178.6664097637409,
            "cpu_time": 178.98268225980928,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/48/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static_chunk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4751306,
            "real_time": 178.29036900576847,
            "cpu_time": 177.582753036744,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/64/1000/real_time",
            "family_index": 6,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static_chunk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3355036,
            "real_time": 163.33538596906,
            "cpu_time": 163.0012315814197,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2159,
            "real_time": 323817.9712827316,
            "cpu_time": 325671.6072255674,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4113,
            "real_time": 171379.60126434005,
            "cpu_time": 170951.85995623632,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8531,
            "real_time": 92488.05532755857,
            "cpu_time": 93409.33067635681,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8764,
            "real_time": 74053.08078496974,
            "cpu_time": 74880.19169329073,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12693,
            "real_time": 143621.0352162593,
            "cpu_time": 141564.24801071457,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static_chunk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 105607.98000005889,
            "cpu_time": 103125.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static_chunk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16028,
            "real_time": 101100.01247812397,
            "cpu_time": 101385.07611679561,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/16/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static_chunk/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9834,
            "real_time": 70435.1128737466,
            "cpu_time": 71499.38987187309,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/24/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static_chunk/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12237,
            "real_time": 60386.18125356959,
            "cpu_time": 60012.66650322791,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/32/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_static_chunk/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10462,
            "real_time": 57965.637545421516,
            "cpu_time": 58246.51118333015,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/48/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_static_chunk/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12967,
            "real_time": 57691.15446899306,
            "cpu_time": 56634.14822241074,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/64/1000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_static_chunk/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11392,
            "real_time": 59418.51299160697,
            "cpu_time": 56234.63834269663,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_static_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 78483666.66662008,
            "cpu_time": 78125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_static_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41169217.64708583,
            "cpu_time": 41360294.11764706,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_static_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 28242983.99998952,
            "cpu_time": 28125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_static_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 21180703.030316208,
            "cpu_time": 20833333.333333332,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_static_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 45,
            "real_time": 16190713.333324108,
            "cpu_time": 16319444.444444444,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/8/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_static_chunk/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 12248960.344816718,
            "cpu_time": 12392241.379310345,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/12/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_static_chunk/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 10048331.481475206,
            "cpu_time": 10127314.814814815,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/16/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_static_chunk/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 14065754.285716789,
            "cpu_time": 14062500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/24/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_static_chunk/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 51,
            "real_time": 14432186.274515355,
            "cpu_time": 14093137.25490196,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/32/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_static_chunk/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 13959443.999992799,
            "cpu_time": 13750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/48/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_static_chunk/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 12739289.999990432,
            "cpu_time": 11875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static_chunk/64/100000000/real_time",
            "family_index": 6,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_static_chunk/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53,
            "real_time": 13805658.490573993,
            "cpu_time": 13856132.075471697,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_auto_chunk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3818945,
            "real_time": 214.8759146832803,
            "cpu_time": 204.572205150899,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_auto_chunk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3838243,
            "real_time": 184.85424190175684,
            "cpu_time": 183.18928739008967,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_auto_chunk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3848245,
            "real_time": 186.1500242318718,
            "cpu_time": 186.77345127454203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_auto_chunk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3723668,
            "real_time": 189.46434000006246,
            "cpu_time": 188.8259103658006,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_auto_chunk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3735768,
            "real_time": 183.17901432866043,
            "cpu_time": 184.03177070953015,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_auto_chunk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3549968,
            "real_time": 183.08029818856406,
            "cpu_time": 184.86082128064254,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_auto_chunk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3673609,
            "real_time": 202.8756462649379,
            "cpu_time": 204.15890749396576,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/16/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_auto_chunk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3212313,
            "real_time": 203.9823952395753,
            "cpu_time": 204.292047505956,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/24/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_auto_chunk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3411334,
            "real_time": 195.44996180388623,
            "cpu_time": 187.79310381217437,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/32/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_auto_chunk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3484350,
            "real_time": 209.1117425058522,
            "cpu_time": 210.76384404551783,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/48/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_auto_chunk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3554234,
            "real_time": 176.10947956722984,
            "cpu_time": 175.8466099868495,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/64/1000/real_time",
            "family_index": 7,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_auto_chunk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3693072,
            "real_time": 191.20899890395674,
            "cpu_time": 190.39027671272046,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_auto_chunk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2155,
            "real_time": 324569.3271462236,
            "cpu_time": 326276.10208816704,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_auto_chunk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5558,
            "real_time": 136130.04677943318,
            "cpu_time": 132129.3630802447,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_auto_chunk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4686,
            "real_time": 154167.13615030114,
            "cpu_time": 150048.01536491676,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_auto_chunk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9778,
            "real_time": 71027.16301899514,
            "cpu_time": 71908.87707097566,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_auto_chunk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9643,
            "real_time": 112624.12112415681,
            "cpu_time": 111803.89920149332,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_auto_chunk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10117,
            "real_time": 82501.13670060049,
            "cpu_time": 81854.79885341505,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_auto_chunk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14106,
            "real_time": 66408.68424784356,
            "cpu_time": 66461.08039132284,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/16/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_auto_chunk/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10000,
            "real_time": 56800.8700000064,
            "cpu_time": 53125.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/24/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_auto_chunk/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6778,
            "real_time": 122143.70020648718,
            "cpu_time": 108346.85748008262,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/32/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_auto_chunk/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5587,
            "real_time": 103202.05834970773,
            "cpu_time": 103476.82119205299,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/48/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_auto_chunk/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3436,
            "real_time": 196126.65890572494,
            "cpu_time": 190992.43306169964,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/64/1000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_auto_chunk/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4953,
            "real_time": 160799.7173429243,
            "cpu_time": 154578.0335150414,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_auto_chunk/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 82591800.00004563,
            "cpu_time": 80357142.85714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_auto_chunk/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41079200.00002623,
            "cpu_time": 40441176.47058824,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_auto_chunk/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25,
            "real_time": 27999328.000005335,
            "cpu_time": 28125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_auto_chunk/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 32,
            "real_time": 21934681.250002086,
            "cpu_time": 21972656.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_auto_chunk/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 15146919.047613967,
            "cpu_time": 15252976.19047619,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/8/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_auto_chunk/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 56,
            "real_time": 14263171.428573126,
            "cpu_time": 14229910.714285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/12/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_auto_chunk/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 13808664.285716077,
            "cpu_time": 13839285.714285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/16/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_auto_chunk/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 57,
            "real_time": 12556296.491215777,
            "cpu_time": 12061403.50877193,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/24/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_auto_chunk/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 60,
            "real_time": 12636908.333327787,
            "cpu_time": 12239583.333333334,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/32/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_auto_chunk/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 54,
            "real_time": 13286983.333340753,
            "cpu_time": 13020833.333333334,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/48/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_auto_chunk/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 53,
            "real_time": 13240339.622636795,
            "cpu_time": 13266509.433962265,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_auto_chunk/64/100000000/real_time",
            "family_index": 7,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_auto_chunk/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 52,
            "real_time": 13301409.61538116,
            "cpu_time": 12019230.76923077,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "simple_pool_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:09:52-07:00",
          "host_name": "windows-bench",
          "executable": "simple_pool_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6625,
            "real_time": 110.34019622635448,
            "cpu_time": 108.49056603773585,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 11201,
            "real_time": 61.096116418232064,
            "cpu_time": 59.98348361753415,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10543,
            "real_time": 65.82905245180723,
            "cpu_time": 66.6911694963483,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10451,
            "real_time": 66.21044876082574,
            "cpu_time": 67.27825088508277,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9390,
            "real_time": 73.65813631521854,
            "cpu_time": 71.55218317358893,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9519,
            "real_time": 74.59586090976644,
            "cpu_time": 73.86542704065553,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7851,
            "real_time": 89.01725894788872,
            "cpu_time": 89.55865494841422,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8502,
            "real_time": 82.66290284639199,
            "cpu_time": 80.86332627617031,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3540,
            "real_time": 199.71918079115676,
            "cpu_time": 198.6228813559322,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2026,
            "real_time": 378.6000987169127,
            "cpu_time": 377.89980256663375,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 932,
            "real_time": 687.1267167380328,
            "cpu_time": 687.3658798283262,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 0,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 813,
            "real_time": 873.793603936454,
            "cpu_time": 864.8523985239852,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 542,
            "real_time": 1394.85276752724,
            "cpu_time": 1354.9354243542437,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 787,
            "real_time": 830.3519695039215,
            "cpu_time": 833.8627700127065,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 892,
            "real_time": 788.4571748878452,
            "cpu_time": 753.2230941704036,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 873,
            "real_time": 804.4626575024355,
            "cpu_time": 805.4123711340206,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 794,
            "real_time": 864.0049118391016,
            "cpu_time": 865.8690176322418,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 863,
            "real_time": 806.9064889923275,
            "cpu_time": 796.6396292004634,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 634,
            "real_time": 1084.3216088332003,
            "cpu_time": 1059.7397476340693,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 489,
            "real_time": 1462.9511247454159,
            "cpu_time": 1405.9304703476482,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 282,
            "real_time": 2309.0762411353767,
            "cpu_time": 2327.127659574468,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 194,
            "real_time": 3841.8216494832936,
            "cpu_time": 3865.979381443299,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 78,
            "real_time": 8936.300000000183,
            "cpu_time": 8814.102564102564,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/10000/real_time",
            "family_index": 0,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 61,
            "real_time": 10631.54754099175,
            "cpu_time": 10502.04918032787,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 754986.5999999384,
            "cpu_time": 765625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 187431.95000001835,
            "cpu_time": 187500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 189270.47500005754,
            "cpu_time": 179687.5,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 198268.33333354443,
            "cpu_time": 197916.66666666666,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 269627.0000001277,
            "cpu_time": 273437.5,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 194687.624999915,
            "cpu_time": 195312.5,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 200951.36666683783,
            "cpu_time": 203125.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 211672.6666666485,
            "cpu_time": 213541.66666666666,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 235006.60000020918,
            "cpu_time": 234375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 544083.4000000905,
            "cpu_time": 546875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1025400.8000001705,
            "cpu_time": 1000000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 0,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1310981.9999999672,
            "cpu_time": 1312500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28909,
            "real_time": 24.277716282106763,
            "cpu_time": 24.32201044657373,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12475,
            "real_time": 56.13552705407945,
            "cpu_time": 55.11022044088176,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10717,
            "real_time": 195.84133619485556,
            "cpu_time": 196.82513763179995,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 678.2534999993003,
            "cpu_time": 671.875,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1811,
            "real_time": 404.57178354494965,
            "cpu_time": 396.8801766979569,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 893,
            "real_time": 646.0160134384197,
            "cpu_time": 647.3964165733482,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 648,
            "real_time": 915.6794753091253,
            "cpu_time": 916.2808641975308,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 804,
            "real_time": 1070.1370646760308,
            "cpu_time": 1068.8743781094527,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 634,
            "real_time": 1043.2820189267852,
            "cpu_time": 1059.7397476340693,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 589,
            "real_time": 1117.476061121439,
            "cpu_time": 1114.176570458404,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 549,
            "real_time": 1635.1648451739231,
            "cpu_time": 1650.7285974499089,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 1,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 545,
            "real_time": 1298.4027522947179,
            "cpu_time": 1290.1376146788991,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2900,
            "real_time": 241.26279310339356,
            "cpu_time": 242.45689655172413,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7167,
            "real_time": 95.73643086366705,
            "cpu_time": 93.74563973768662,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1164,
            "real_time": 1548.2915807556512,
            "cpu_time": 1530.2835051546392,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 111,
            "real_time": 7496.027027029724,
            "cpu_time": 7601.351351351352,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 169,
            "real_time": 3877.95562130241,
            "cpu_time": 3883.1360946745563,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84,
            "real_time": 9096.213095236159,
            "cpu_time": 8928.57142857143,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 73,
            "real_time": 9134.03835617317,
            "cpu_time": 8989.72602739726,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 10599.76578948121,
            "cpu_time": 10485.197368421053,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 47,
            "real_time": 11601.289361699835,
            "cpu_time": 11303.191489361701,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 10774.16290322177,
            "cpu_time": 10836.693548387097,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 59,
            "real_time": 11690.830508487741,
            "cpu_time": 11652.542372881357,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/10000/real_time",
            "family_index": 1,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 48,
            "real_time": 12350.17499999458,
            "cpu_time": 12044.270833333334,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 24174.65517242065,
            "cpu_time": 24245.689655172413,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 53904.19069767949,
            "cpu_time": 53415.6976744186,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 680157.2000003944,
            "cpu_time": 687500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 676183.100000344,
            "cpu_time": 671875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 410173.0500001395,
            "cpu_time": 414062.5,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 874569.3999999275,
            "cpu_time": 875000.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 917388.3000003116,
            "cpu_time": 921875.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1147152.100000312,
            "cpu_time": 1140625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1095964.3999995023,
            "cpu_time": 1062500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1450257.8000001449,
            "cpu_time": 1437500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1323019.2000000898,
            "cpu_time": 1312500.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 1,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1619255.3999999291,
            "cpu_time": 1578125.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_bulk/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 196472,
            "real_time": 3.6356116902166824,
            "cpu_time": 3.6582820961765545,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_bulk/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6502,
            "real_time": 102.4837588434942,
            "cpu_time": 103.33358966471855,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_bulk/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1817,
            "real_time": 410.48458998371774,
            "cpu_time": 412.7682993946065,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_bulk/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3289,
            "real_time": 228.35810276696577,
            "cpu_time": 228.0328367284889,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_bulk/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3143,
            "real_time": 198.67492841240426,
            "cpu_time": 198.8545975182946,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_bulk/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3593,
            "real_time": 212.9978291119281,
            "cpu_time": 208.73921514055107,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_bulk/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2456,
            "real_time": 383.7573289902642,
            "cpu_time": 381.71824104234526,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_bulk/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2385,
            "real_time": 239.66880503136946,
            "cpu_time": 242.40041928721175,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_bulk/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1796,
            "real_time": 371.0842427619582,
            "cpu_time": 374.0952115812918,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_bulk/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1525,
            "real_time": 436.0011147540807,
            "cpu_time": 440.57377049180326,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_bulk/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3481,
            "real_time": 345.8839126687858,
            "cpu_time": 345.62625682275205,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/1000/real_time",
            "family_index": 2,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_bulk/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2197,
            "real_time": 316.0932635414927,
            "cpu_time": 312.92671825216206,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_bulk/1/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21766,
            "real_time": 32.0947395020109,
            "cpu_time": 31.585959753744373,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_bulk/2/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5245,
            "real_time": 151.15673975203256,
            "cpu_time": 151.930409914204,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_bulk/3/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 388,
            "real_time": 4592.246391753506,
            "cpu_time": 4429.768041237114,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_bulk/4/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 142,
            "real_time": 4338.003521131123,
            "cpu_time": 4401.408450704225,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_bulk/6/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 326,
            "real_time": 3333.4457055227253,
            "cpu_time": 3307.1319018404906,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_bulk/8/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 298,
            "real_time": 1873.2697986560129,
            "cpu_time": 1835.1510067114093,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_bulk/12/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 277,
            "real_time": 3412.0469314068737,
            "cpu_time": 3440.884476534296,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_bulk/16/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 209,
            "real_time": 3433.8157894739784,
            "cpu_time": 3438.995215311005,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_bulk/24/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 180,
            "real_time": 4021.7894444443423,
            "cpu_time": 3993.0555555555557,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_bulk/32/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 190,
            "real_time": 3760.596315786595,
            "cpu_time": 3782.8947368421054,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_bulk/48/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 211,
            "real_time": 2546.2014218000245,
            "cpu_time": 2443.7203791469196,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/10000/real_time",
            "family_index": 2,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_bulk/64/10000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 261,
            "real_time": 2850.483141764403,
            "cpu_time": 2873.5632183908046,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/1/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_bulk/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 220,
            "real_time": 3181.654090907283,
            "cpu_time": 3196.0227272727275,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/2/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_bulk/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 19991.609090924834,
            "cpu_time": 19886.363636363636,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/3/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_bulk/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5,
            "real_time": 132134.42000014766,
            "cpu_time": 131250.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/4/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_bulk/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 380272.5499999724,
            "cpu_time": 378906.25,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/6/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_bulk/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 356584.6000001329,
            "cpu_time": 359375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/8/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_bulk/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4,
            "real_time": 326526.02500002104,
            "cpu_time": 324218.75,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/12/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_bulk/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 390302.8999998241,
            "cpu_time": 390625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/16/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_bulk/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 366209.95000021136,
            "cpu_time": 359375.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/24/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_bulk/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 387441.5999998746,
            "cpu_time": 390625.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/32/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_bulk/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 352939.9499998363,
            "cpu_time": 343750.0,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/48/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_bulk/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 305708.70000004413,
            "cpu_time": 307291.6666666667,
            "time_unit": "us"
          },
          {
            "name": "BM_dispenso_bulk/64/1000000/real_time",
            "family_index": 2,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_bulk/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 173304.50000008568,
            "cpu_time": 171875.0,
            "time_unit": "us"
          }
        ]
      }
    },
    {
      "name": "small_buffer_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:11:46-07:00",
          "host_name": "windows-bench",
          "executable": "small_buffer_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_newdelete<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 896,
            "real_time": 649918.5267858008,
            "cpu_time": 645228.7946428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 249,
            "real_time": 2327006.8273106236,
            "cpu_time": 2321787.1485943776,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5600,
            "real_time": 122091.17857147638,
            "cpu_time": 122767.85714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1445,
            "real_time": 489625.53633217537,
            "cpu_time": 486591.6955017301,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1000,
            "real_time": 513826.20000003953,
            "cpu_time": 500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 4531621.249998352,
            "cpu_time": 4492187.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6400,
            "real_time": 109757.37500004357,
            "cpu_time": 109863.28125,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1600,
            "real_time": 438479.1875003202,
            "cpu_time": 439453.125,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 896,
            "real_time": 915519.308036445,
            "cpu_time": 924246.6517857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 237,
            "real_time": 3777730.379743663,
            "cpu_time": 3757911.3924050634,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4978,
            "real_time": 139625.03013253893,
            "cpu_time": 141246.48453194054,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 558594.1071428481,
            "cpu_time": 544084.8214285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1200,
            "real_time": 60861.78125007298,
            "cpu_time": 1002604.1666666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 160,
            "real_time": 225898.00781389614,
            "cpu_time": 3125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 4480,
            "real_time": 14027.127511199009,
            "cpu_time": 198800.2232142857,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 688,
            "real_time": 51777.78888078719,
            "cpu_time": 953851.7441860465,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 928,
            "real_time": 67333.4455817907,
            "cpu_time": 1077586.2068965517,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 160,
            "real_time": 285637.53906283295,
            "cpu_time": 4492187.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 3776,
            "real_time": 15188.683461341448,
            "cpu_time": 231726.69491525425,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1120,
            "real_time": 60007.86272320877,
            "cpu_time": 864955.3571428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1600,
            "real_time": 75147.95312506096,
            "cpu_time": 1093750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 144,
            "real_time": 394780.77256897377,
            "cpu_time": 6184895.833333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1600,
            "real_time": 14773.132812493373,
            "cpu_time": 312500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 480,
            "real_time": 64837.47395845304,
            "cpu_time": 1204427.0833333333,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "small_buffer_shared_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:12:01-07:00",
          "host_name": "windows-bench",
          "executable": "small_buffer_shared_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_newdelete<kSmallSize>/8192",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 896,
            "real_time": 748380.3571426099,
            "cpu_time": 749860.4910714285,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768",
            "family_index": 0,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 264,
            "real_time": 2322325.000002159,
            "cpu_time": 2308238.6363636362,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4978,
            "real_time": 136748.11169155658,
            "cpu_time": 138107.67376456407,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "family_index": 1,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 546668.4821434293,
            "cpu_time": 544084.8214285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 513510.35714251985,
            "cpu_time": 488281.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768",
            "family_index": 2,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 154,
            "real_time": 4597018.831167612,
            "cpu_time": 4565746.7532467535,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5600,
            "real_time": 122679.41071432429,
            "cpu_time": 122767.85714285714,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 491120.7142859959,
            "cpu_time": 488281.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 640,
            "real_time": 1254287.3437496382,
            "cpu_time": 1269531.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 212,
            "real_time": 3153438.207545888,
            "cpu_time": 3169221.6981132077,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4480,
            "real_time": 149054.9776785802,
            "cpu_time": 146484.375,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1120,
            "real_time": 595084.1071426437,
            "cpu_time": 585937.5,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 640,
            "real_time": 60153.906249738044,
            "cpu_time": 805664.0625,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "family_index": 6,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 272,
            "real_time": 221986.53492717526,
            "cpu_time": 3791360.294117647,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 4480,
            "real_time": 14595.566406251846,
            "cpu_time": 251116.07142857142,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "family_index": 7,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kSmallSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 768,
            "real_time": 55517.51302087145,
            "cpu_time": 996907.5520833334,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 496,
            "real_time": 61919.997480021404,
            "cpu_time": 1008064.5161290322,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "family_index": 8,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 96,
            "real_time": 296599.54427193233,
            "cpu_time": 5208333.333333333,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 3776,
            "real_time": 14346.227820424781,
            "cpu_time": 223450.74152542374,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "family_index": 9,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kMediumSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1280,
            "real_time": 62205.6005859406,
            "cpu_time": 866699.21875,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_newdelete<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 480,
            "real_time": 74954.21875004146,
            "cpu_time": 1041666.6666666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "family_index": 10,
            "per_family_instance_index": 1,
            "run_name": "BM_newdelete<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 112,
            "real_time": 380519.4754450879,
            "cpu_time": 5859375.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/8192/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 1600,
            "real_time": 15871.16015620893,
            "cpu_time": 322265.625,
            "time_unit": "ns"
          },
          {
            "name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "family_index": 11,
            "per_family_instance_index": 1,
            "run_name": "BM_small_buffer_allocator<kLargeSize>/32768/threads:16",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 16,
            "iterations": 480,
            "real_time": 72366.75781250311,
            "cpu_time": 1269531.25,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "summing_for_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:12:15-07:00",
          "host_name": "windows-bench",
          "executable": "summing_for_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4072727,
            "real_time": 165.63020305552206,
            "cpu_time": 161.13282329014442,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3733,
            "real_time": 191584.62362719976,
            "cpu_time": 192539.51245646933,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 40467617.64707566,
            "cpu_time": 40441176.47058824,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7379,
            "real_time": 95965.93034283393,
            "cpu_time": 95287.30180241226,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10993,
            "real_time": 62567.788592751866,
            "cpu_time": 62539.79805330665,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10867,
            "real_time": 66087.83472895667,
            "cpu_time": 66140.60918376737,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13229,
            "real_time": 55719.6008768989,
            "cpu_time": 55512.51039383173,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13018,
            "real_time": 53579.3209402647,
            "cpu_time": 54011.75295744354,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12685,
            "real_time": 53853.236105647215,
            "cpu_time": 52966.101694915254,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15106,
            "real_time": 47966.7218324109,
            "cpu_time": 48614.7888256322,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16043,
            "real_time": 43952.141120723856,
            "cpu_time": 43827.526023811006,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16853,
            "real_time": 42071.109001370045,
            "cpu_time": 41721.05856524061,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16934,
            "real_time": 41295.14586041345,
            "cpu_time": 41521.49521672375,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15802,
            "real_time": 44369.940513872105,
            "cpu_time": 44495.949879762054,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21506,
            "real_time": 33112.1733469637,
            "cpu_time": 32694.364363433462,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1167,
            "real_time": 608633.9331620311,
            "cpu_time": 602506.4267352185,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2859,
            "real_time": 248218.53795055265,
            "cpu_time": 245933.89296956977,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3239,
            "real_time": 197207.87280019786,
            "cpu_time": 197784.81012658228,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2964,
            "real_time": 174024.358974295,
            "cpu_time": 173962.55060728744,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 4979,
            "real_time": 135652.9222735869,
            "cpu_time": 134941.75537256477,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5699,
            "real_time": 124713.75679935279,
            "cpu_time": 126118.61730128093,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6584,
            "real_time": 99408.56622125838,
            "cpu_time": 99673.45078979344,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7328,
            "real_time": 92679.01200873358,
            "cpu_time": 93818.23144104803,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8834,
            "real_time": 77913.14240441729,
            "cpu_time": 76055.58071088974,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9753,
            "real_time": 70674.81800470152,
            "cpu_time": 70491.13093407157,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 10421,
            "real_time": 69779.40696671692,
            "cpu_time": 68971.30793589867,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12259,
            "real_time": 57817.8154824973,
            "cpu_time": 58630.39399624766,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 50261871.42857452,
            "cpu_time": 50223214.28571428,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 21100748.48485855,
            "cpu_time": 21306818.181818184,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 49,
            "real_time": 14535400.000011355,
            "cpu_time": 14349489.795918368,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 10938712.499992676,
            "cpu_time": 10498046.875,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 70,
            "real_time": 7612709.999999165,
            "cpu_time": 7589285.714285715,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 120,
            "real_time": 5862020.833334706,
            "cpu_time": 5859375.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5012698.999998975,
            "cpu_time": 5000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 172,
            "real_time": 4375098.837213997,
            "cpu_time": 4269622.0930232555,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 157,
            "real_time": 4064921.019110611,
            "cpu_time": 4080414.0127388537,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 172,
            "real_time": 4039693.0232556006,
            "cpu_time": 4087936.0465116277,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 170,
            "real_time": 4083682.9411786958,
            "cpu_time": 4136029.411764706,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 171,
            "real_time": 4098470.175438983,
            "cpu_time": 3746345.0292397663,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2013996,
            "real_time": 283.97484404163015,
            "cpu_time": 271.53728209986514,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2438738,
            "real_time": 287.415294303854,
            "cpu_time": 281.90810164929565,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2424301,
            "real_time": 288.7592753538838,
            "cpu_time": 290.0320546004807,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2392217,
            "real_time": 291.2505429065487,
            "cpu_time": 287.3903161795105,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2399459,
            "real_time": 292.6986874961101,
            "cpu_time": 286.52292037496784,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2298641,
            "real_time": 297.2038260866358,
            "cpu_time": 292.29227182496095,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2331874,
            "real_time": 291.68175467479045,
            "cpu_time": 294.8272505289737,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2287150,
            "real_time": 297.40874013502855,
            "cpu_time": 300.59244037339045,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2312689,
            "real_time": 308.25913038902155,
            "cpu_time": 304.02920582923167,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2017800,
            "real_time": 324.9908315987843,
            "cpu_time": 325.2304490038656,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2118822,
            "real_time": 356.9433864665559,
            "cpu_time": 353.9702721606629,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2056370,
            "real_time": 419.15277892633566,
            "cpu_time": 417.9087421038043,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3640,
            "real_time": 186574.36813195448,
            "cpu_time": 184581.04395604396,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1778,
            "real_time": 391835.8830147151,
            "cpu_time": 395458.3802024747,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2613,
            "real_time": 267170.3406047968,
            "cpu_time": 269087.2560275545,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3495,
            "real_time": 203913.19027185196,
            "cpu_time": 205650.92989985694,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 5038,
            "real_time": 137883.74354900772,
            "cpu_time": 139564.3112346169,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6362,
            "real_time": 107803.5523420331,
            "cpu_time": 108063.50204338258,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8020,
            "real_time": 92882.73067330717,
            "cpu_time": 93516.20947630923,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7763,
            "real_time": 117187.36313277017,
            "cpu_time": 114726.9096998583,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 26452,
            "real_time": 29895.387872354004,
            "cpu_time": 29534.62876153032,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15704,
            "real_time": 43855.310748823875,
            "cpu_time": 43778.65511971472,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13450,
            "real_time": 56892.74349440751,
            "cpu_time": 56923.79182156134,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 13780,
            "real_time": 45718.8461538357,
            "cpu_time": 45355.587808418,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41011788.23528993,
            "cpu_time": 40441176.47058824,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16,
            "real_time": 44761018.74996629,
            "cpu_time": 44921875.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 29970426.086947557,
            "cpu_time": 29891304.347826086,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 33,
            "real_time": 21354400.00000182,
            "cpu_time": 21306818.181818184,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 44,
            "real_time": 16108543.181809926,
            "cpu_time": 15980113.636363637,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 64,
            "real_time": 10993362.50000249,
            "cpu_time": 10986328.125,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85,
            "real_time": 7566056.470579283,
            "cpu_time": 7536764.705882353,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 115,
            "real_time": 6707372.1739095915,
            "cpu_time": 6657608.695652174,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5014663.000001746,
            "cpu_time": 5000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 168,
            "real_time": 4178838.690477061,
            "cpu_time": 4185267.8571428573,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 171,
            "real_time": 4101583.040938903,
            "cpu_time": 4111842.1052631577,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 171,
            "real_time": 4104495.321637294,
            "cpu_time": 3746345.0292397663,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2493701,
            "real_time": 280.6435093864744,
            "cpu_time": 281.96042749311164,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2463790,
            "real_time": 284.9030964492015,
            "cpu_time": 285.38349453484267,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2449804,
            "real_time": 287.78959459620575,
            "cpu_time": 287.01275693892245,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2375635,
            "real_time": 291.01048772221753,
            "cpu_time": 289.3963087763903,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2423796,
            "real_time": 291.72471610654327,
            "cpu_time": 283.64598340784454,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/8/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2395357,
            "real_time": 291.15530586881096,
            "cpu_time": 293.5366210548156,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/12/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2434237,
            "real_time": 295.94854568417975,
            "cpu_time": 282.42936082230284,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2302359,
            "real_time": 295.56216037566224,
            "cpu_time": 291.8202591342184,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/24/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2331757,
            "real_time": 318.0313814861727,
            "cpu_time": 314.9449106403455,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/32/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2222777,
            "real_time": 335.5259209537515,
            "cpu_time": 337.41576415447884,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/48/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1743188,
            "real_time": 335.76097357238046,
            "cpu_time": 322.68464445601967,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/1000/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/64/1000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2020516,
            "real_time": 358.31045138953414,
            "cpu_time": 363.45913618105476,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3620,
            "real_time": 190287.23756883608,
            "cpu_time": 189917.1270718232,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7193,
            "real_time": 97866.87056869271,
            "cpu_time": 97751.28597247324,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9594,
            "real_time": 70381.05065669553,
            "cpu_time": 71659.37043985825,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 12696,
            "real_time": 56207.774102109506,
            "cpu_time": 55381.61625708885,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17881,
            "real_time": 41380.25837477722,
            "cpu_time": 40196.297746211065,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20533,
            "real_time": 34965.104952999834,
            "cpu_time": 34243.656552866116,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 25833,
            "real_time": 28104.05682653716,
            "cpu_time": 27822.939650834203,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29324,
            "real_time": 25396.207884334948,
            "cpu_time": 25576.319738098486,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28569,
            "real_time": 25628.362910858246,
            "cpu_time": 25705.309951345866,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_static/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14482,
            "real_time": 39538.0541361425,
            "cpu_time": 38841.320259632645,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_static/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14674,
            "real_time": 45744.5345508898,
            "cpu_time": 44721.957203216574,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_static/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 16921,
            "real_time": 46184.386265578796,
            "cpu_time": 45247.03031735713,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17,
            "real_time": 41278441.17647328,
            "cpu_time": 41360294.11764706,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 30218669.56521041,
            "cpu_time": 29891304.347826086,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 34,
            "real_time": 20743876.470586497,
            "cpu_time": 20680147.05882353,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 43,
            "real_time": 15652655.813958751,
            "cpu_time": 15625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92,
            "real_time": 7662764.13043235,
            "cpu_time": 7642663.043478261,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_static/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 76,
            "real_time": 8163021.052627971,
            "cpu_time": 8223684.2105263155,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_static/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91,
            "real_time": 6047028.5714293085,
            "cpu_time": 5837912.087912088,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_static/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 120,
            "real_time": 4726758.333337481,
            "cpu_time": 4557291.666666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_static/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 123,
            "real_time": 4290023.577241078,
            "cpu_time": 4319105.691056911,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_static/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 4422926.249998227,
            "cpu_time": 4394531.25,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_static/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 100,
            "real_time": 5035360.0000016745,
            "cpu_time": 4843750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_static/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 160,
            "real_time": 4380931.250000231,
            "cpu_time": 4003906.25,
            "time_unit": "ns"
          }
        ]
      }
    },
    {
      "name": "timed_task_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:14:11-07:00",
          "host_name": "windows-bench",
          "executable": "timed_task_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_dispenso<2,false>/manual_time",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00011410507199445439,
            "stddev": 0.00025959161117679964
          },
          {
            "name": "BM_dispenso<4,false>/manual_time",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 9.499403194593473e-05,
            "stddev": 0.00022148459148240075
          },
          {
            "name": "BM_dispenso<6,false>/manual_time",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00011122424800323612,
            "stddev": 0.0002584484966445175
          },
          {
            "name": "BM_dispenso<2,true>/manual_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00021067590805366529,
            "stddev": 0.00032774180372595736
          },
          {
            "name": "BM_dispenso<4,true>/manual_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.00012264779616308292,
            "stddev": 0.00024850180966736076
          },
          {
            "name": "BM_dispenso<6,true>/manual_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0001564002840850341,
            "stddev": 0.00028391568381329395
          },
          {
            "name": "BM_dispenso_mixed<false>/manual_time",
            "family_index": 6,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 5.8251430668715325e-05,
            "stddev": 0.00018424238583588737
          },
          {
            "name": "BM_dispenso_mixed<true>/manual_time",
            "family_index": 7,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0001699293040307555,
            "stddev": 0.0002952365190105025
          },
          {
            "name": "BM_folly<2,false>/manual_time",
            "family_index": 8,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0005680985280046283,
            "stddev": 0.0005642962547763239
          },
          {
            "name": "BM_folly<4,false>/manual_time",
            "family_index": 9,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0005455392239974363,
            "stddev": 0.0005558913424860759
          },
          {
            "name": "BM_folly<6,false>/manual_time",
            "family_index": 10,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0005332746680010663,
            "stddev": 0.0006059270038487692
          },
          {
            "name": "BM_folly<2,true>/manual_time",
            "family_index": 11,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<2,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0009192386400182341,
            "stddev": 0.0005620669852732883
          },
          {
            "name": "BM_folly<4,true>/manual_time",
            "family_index": 12,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<4,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0008921381915636856,
            "stddev": 0.0004929507789989129
          },
          {
            "name": "BM_folly<6,true>/manual_time",
            "family_index": 13,
            "per_family_instance_index": 0,
            "run_name": "BM_folly<6,true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0008487736196124131,
            "stddev": 0.00045440981516014855
          },
          {
            "name": "BM_folly_mixed<false>/manual_time",
            "family_index": 14,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<false>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0007231684653297855,
            "stddev": 0.0005998303041159766
          },
          {
            "name": "BM_folly_mixed<true>/manual_time",
            "family_index": 15,
            "per_family_instance_index": 0,
            "run_name": "BM_folly_mixed<true>/manual_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 7,
            "real_time": 100000000.0,
            "cpu_time": 0.0,
            "time_unit": "ns",
            "mean": 0.0007927332290995415,
            "stddev": 0.0005397679052957852
          }
        ]
      }
    },
    {
      "name": "trivial_compute_benchmark",
      "success": true,
      "data": {
        "context": {
          "date": "2026-03-20T09:14:40-07:00",
          "host_name": "windows-bench",
          "executable": "trivial_compute_benchmark.exe",
          "num_cpus": 64,
          "mhz_per_cpu": 2500,
          "caches": [
            {
              "type": "Data",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Instruction",
              "level": 1,
              "size": 32768,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 2,
              "size": 1048576,
              "num_sharing": 2
            },
            {
              "type": "Unified",
              "level": 3,
              "size": 34603008,
              "num_sharing": 32
            }
          ],
          "load_avg": [],
          "library_build_type": "release"
        },
        "benchmarks": [
          {
            "name": "BM_serial<kSmallSize>",
            "family_index": 0,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kSmallSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 89600,
            "real_time": 8072.840401790619,
            "cpu_time": 8196.14955357143,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kMediumSize>",
            "family_index": 1,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kMediumSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 9,
            "real_time": 90999777.77784969,
            "cpu_time": 92013888.8888889,
            "time_unit": "ns"
          },
          {
            "name": "BM_serial<kLargeSize>",
            "family_index": 2,
            "per_family_instance_index": 0,
            "run_name": "BM_serial<kLargeSize>",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 7970337799.99947,
            "cpu_time": 7953125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 0,
            "run_name": "BM_tbb/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17905,
            "real_time": 39884.28930467646,
            "cpu_time": 39269.75705110304,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 1,
            "run_name": "BM_tbb/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22140,
            "real_time": 31430.38843723694,
            "cpu_time": 31758.130081300813,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 2,
            "run_name": "BM_tbb/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19605,
            "real_time": 31214.695230832833,
            "cpu_time": 30285.64141800561,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 3,
            "run_name": "BM_tbb/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22820,
            "real_time": 32296.577563510007,
            "cpu_time": 32181.20070113935,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 4,
            "run_name": "BM_tbb/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22591,
            "real_time": 31171.23190649365,
            "cpu_time": 31124.12022486831,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 5,
            "run_name": "BM_tbb/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22843,
            "real_time": 30344.09665979791,
            "cpu_time": 30096.747362430502,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 6,
            "run_name": "BM_tbb/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22780,
            "real_time": 29450.223880596433,
            "cpu_time": 29494.073748902545,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 7,
            "run_name": "BM_tbb/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 22251,
            "real_time": 31696.94395757417,
            "cpu_time": 30897.4877533594,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 8,
            "run_name": "BM_tbb/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 19821,
            "real_time": 32137.197921382132,
            "cpu_time": 32320.51864184451,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 9,
            "run_name": "BM_tbb/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 20543,
            "real_time": 33922.67439030511,
            "cpu_time": 33466.38757727693,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 10,
            "run_name": "BM_tbb/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 17795,
            "real_time": 39177.04973306321,
            "cpu_time": 39512.50351222253,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100/real_time",
            "family_index": 3,
            "per_family_instance_index": 11,
            "run_name": "BM_tbb/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28791,
            "real_time": 23799.781181623777,
            "cpu_time": 23878.989962140946,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 12,
            "run_name": "BM_tbb/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 6,
            "real_time": 114842633.33325846,
            "cpu_time": 111979166.66666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 13,
            "run_name": "BM_tbb/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 45232699.999966525,
            "cpu_time": 44791666.666666664,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 14,
            "run_name": "BM_tbb/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 30166578.260832466,
            "cpu_time": 29211956.52173913,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 15,
            "run_name": "BM_tbb/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 31,
            "real_time": 22630087.096771486,
            "cpu_time": 22681451.612903226,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 16,
            "run_name": "BM_tbb/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 15176271.739145033,
            "cpu_time": 15285326.086956521,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 17,
            "run_name": "BM_tbb/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 62,
            "real_time": 11396609.677420553,
            "cpu_time": 11340725.806451613,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 18,
            "run_name": "BM_tbb/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 92,
            "real_time": 7612427.17391289,
            "cpu_time": 7472826.0869565215,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 19,
            "run_name": "BM_tbb/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 122,
            "real_time": 5816072.1311472375,
            "cpu_time": 5763319.672131147,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 20,
            "run_name": "BM_tbb/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 144,
            "real_time": 4536531.944444909,
            "cpu_time": 4557291.666666667,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 21,
            "run_name": "BM_tbb/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 183,
            "real_time": 3924480.8743161066,
            "cpu_time": 3927595.6284153005,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 22,
            "run_name": "BM_tbb/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 252,
            "real_time": 2749477.777775783,
            "cpu_time": 2728174.603174603,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/1000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 23,
            "run_name": "BM_tbb/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 275,
            "real_time": 2581395.6363642565,
            "cpu_time": 2556818.1818181816,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/1/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 24,
            "run_name": "BM_tbb/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 11798446499.999954,
            "cpu_time": 11671875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/2/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 25,
            "run_name": "BM_tbb/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3921351299.9995146,
            "cpu_time": 3875000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/3/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 26,
            "run_name": "BM_tbb/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2611634500.0000367,
            "cpu_time": 2578125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/4/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 27,
            "run_name": "BM_tbb/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1959720800.0004685,
            "cpu_time": 1953125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/6/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 28,
            "run_name": "BM_tbb/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1307265999.9997995,
            "cpu_time": 1296875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/8/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 29,
            "run_name": "BM_tbb/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 983014100.0002186,
            "cpu_time": 984375000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/12/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 30,
            "run_name": "BM_tbb/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 658705300.0002925,
            "cpu_time": 640625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/16/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 31,
            "run_name": "BM_tbb/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 510544100.00051576,
            "cpu_time": 500000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/24/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 32,
            "run_name": "BM_tbb/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 524089900.0000354,
            "cpu_time": 515625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/32/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 33,
            "run_name": "BM_tbb/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 527799899.9999909,
            "cpu_time": 515625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/48/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 34,
            "run_name": "BM_tbb/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 279121349.9999685,
            "cpu_time": 281250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_tbb/64/100000000/real_time",
            "family_index": 3,
            "per_family_instance_index": 35,
            "run_name": "BM_tbb/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 3,
            "real_time": 277920633.33314824,
            "cpu_time": 265625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85416,
            "real_time": 8194.632153224366,
            "cpu_time": 8231.771565046362,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85378,
            "real_time": 8200.924125655556,
            "cpu_time": 8235.435358054769,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85382,
            "real_time": 8208.993698898777,
            "cpu_time": 8235.049542058045,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85084,
            "real_time": 8210.547223910802,
            "cpu_time": 8263.892153636407,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85250,
            "real_time": 8211.580058653659,
            "cpu_time": 8064.5161290322585,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85240,
            "real_time": 8211.550915062888,
            "cpu_time": 8065.462224307837,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84894,
            "real_time": 8225.209084263233,
            "cpu_time": 7914.281339081678,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84553,
            "real_time": 8275.614111854047,
            "cpu_time": 8130.994760682649,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84214,
            "real_time": 8283.037262208562,
            "cpu_time": 8349.264967820078,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84688,
            "real_time": 8325.585679190948,
            "cpu_time": 8302.534007179293,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 82658,
            "real_time": 8419.918217231101,
            "cpu_time": 8506.436158629534,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100/real_time",
            "family_index": 4,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 80961,
            "real_time": 8715.408653548191,
            "cpu_time": 8491.742937957782,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 90152174.99995743,
            "cpu_time": 87890625.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 15,
            "real_time": 45173566.66668396,
            "cpu_time": 45833333.333333336,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 23,
            "real_time": 30222656.521761127,
            "cpu_time": 30570652.173913043,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 29,
            "real_time": 22694403.448283616,
            "cpu_time": 22629310.344827585,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 46,
            "real_time": 15322123.913036825,
            "cpu_time": 15285326.086956521,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 58,
            "real_time": 11436237.93103839,
            "cpu_time": 11584051.72413793,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 91,
            "real_time": 7971884.615381986,
            "cpu_time": 7898351.648351648,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 116,
            "real_time": 5826082.7586231055,
            "cpu_time": 5792025.862068965,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 177,
            "real_time": 4384141.807909891,
            "cpu_time": 4325564.971751412,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 198,
            "real_time": 3530944.4444437483,
            "cpu_time": 3551136.3636363638,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 228,
            "real_time": 3137914.9122825,
            "cpu_time": 3015350.8771929825,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/1000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 227,
            "real_time": 3131572.687223701,
            "cpu_time": 3028634.3612334803,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/1/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 7959612399.9999695,
            "cpu_time": 7953125000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/2/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 4335119400.000622,
            "cpu_time": 4343750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/3/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2660708999.9997697,
            "cpu_time": 2640625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/4/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2002838999.999767,
            "cpu_time": 2000000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/6/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1336128599.9999382,
            "cpu_time": 1312500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/8/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1004162499.9994383,
            "cpu_time": 1000000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/12/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 673378100.0002637,
            "cpu_time": 671875000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/16/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 528924399.99959606,
            "cpu_time": 531250000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/24/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 354444800.0002376,
            "cpu_time": 351562500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/32/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 265958699.99982825,
            "cpu_time": 257812500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/48/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 284690399.9998176,
            "cpu_time": 289062500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso/64/100000000/real_time",
            "family_index": 4,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 283763350.0002994,
            "cpu_time": 273437500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 0,
            "run_name": "BM_dispenso_static/1/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85770,
            "real_time": 8170.67739303006,
            "cpu_time": 8197.796432318994,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 1,
            "run_name": "BM_dispenso_static/2/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85664,
            "real_time": 8172.214699286489,
            "cpu_time": 8207.94032499066,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 2,
            "run_name": "BM_dispenso_static/3/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85592,
            "real_time": 8176.984998595961,
            "cpu_time": 8032.292737639032,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 3,
            "run_name": "BM_dispenso_static/4/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85334,
            "real_time": 8177.017367055757,
            "cpu_time": 8239.681721236553,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 4,
            "run_name": "BM_dispenso_static/6/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85484,
            "real_time": 8189.980581161395,
            "cpu_time": 8225.223433625006,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 5,
            "run_name": "BM_dispenso_static/8/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85561,
            "real_time": 8194.99187713534,
            "cpu_time": 8217.821203585745,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 6,
            "run_name": "BM_dispenso_static/12/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85251,
            "real_time": 8194.353145422458,
            "cpu_time": 8247.703839251153,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 7,
            "run_name": "BM_dispenso_static/16/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 85040,
            "real_time": 8220.970131707054,
            "cpu_time": 8268.167920978363,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 8,
            "run_name": "BM_dispenso_static/24/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84686,
            "real_time": 8234.149682353822,
            "cpu_time": 7933.719859244739,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 9,
            "run_name": "BM_dispenso_static/32/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 84139,
            "real_time": 8262.935143037008,
            "cpu_time": 8171.002745456923,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 10,
            "run_name": "BM_dispenso_static/48/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 83857,
            "real_time": 8549.29701754648,
            "cpu_time": 8571.138962758028,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100/real_time",
            "family_index": 5,
            "per_family_instance_index": 11,
            "run_name": "BM_dispenso_static/64/100/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 81512,
            "real_time": 8642.213416427257,
            "cpu_time": 8626.03052311316,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 12,
            "run_name": "BM_dispenso_static/1/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 8,
            "real_time": 89947212.49996473,
            "cpu_time": 89843750.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 13,
            "run_name": "BM_dispenso_static/2/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 14,
            "real_time": 50276828.57137603,
            "cpu_time": 51339285.71428572,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 14,
            "run_name": "BM_dispenso_static/3/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 21,
            "real_time": 33563195.23811094,
            "cpu_time": 33482142.85714286,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 15,
            "run_name": "BM_dispenso_static/4/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 28,
            "real_time": 25172449.99999743,
            "cpu_time": 24553571.42857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 16,
            "run_name": "BM_dispenso_static/6/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 42,
            "real_time": 16789247.619033717,
            "cpu_time": 16741071.42857143,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 17,
            "run_name": "BM_dispenso_static/8/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 50,
            "real_time": 12676171.99999222,
            "cpu_time": 12500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 18,
            "run_name": "BM_dispenso_static/12/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 75,
            "real_time": 8789814.666658156,
            "cpu_time": 8541666.666666666,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 19,
            "run_name": "BM_dispenso_static/16/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 99,
            "real_time": 7168474.747470669,
            "cpu_time": 7260101.01010101,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 20,
            "run_name": "BM_dispenso_static/24/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 123,
            "real_time": 6172592.682922526,
            "cpu_time": 5970528.455284553,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 21,
            "run_name": "BM_dispenso_static/32/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 146,
            "real_time": 4363375.3424641425,
            "cpu_time": 4387842.465753425,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 22,
            "run_name": "BM_dispenso_static/48/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 176,
            "real_time": 4016964.204543621,
            "cpu_time": 3995028.409090909,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/1000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 23,
            "run_name": "BM_dispenso_static/64/1000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 184,
            "real_time": 3782068.478259411,
            "cpu_time": 3736413.0434782607,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/1/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 24,
            "run_name": "BM_dispenso_static/1/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 7927653300.000202,
            "cpu_time": 7875000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/2/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 25,
            "run_name": "BM_dispenso_static/2/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 3955117200.0001316,
            "cpu_time": 3937500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/3/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 26,
            "run_name": "BM_dispenso_static/3/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 2642757199.9993234,
            "cpu_time": 2640625000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/4/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 27,
            "run_name": "BM_dispenso_static/4/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1982501099.9993536,
            "cpu_time": 1968750000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/6/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 28,
            "run_name": "BM_dispenso_static/6/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 1326344799.999788,
            "cpu_time": 1312500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/8/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 29,
            "run_name": "BM_dispenso_static/8/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 997577999.9995211,
            "cpu_time": 1000000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/12/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 30,
            "run_name": "BM_dispenso_static/12/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 675283999.9999197,
            "cpu_time": 687500000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/16/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 31,
            "run_name": "BM_dispenso_static/16/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 518762899.99927396,
            "cpu_time": 500000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/24/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 32,
            "run_name": "BM_dispenso_static/24/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 1,
            "real_time": 503509899.999699,
            "cpu_time": 500000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/32/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 33,
            "run_name": "BM_dispenso_static/32/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 382804600.0003269,
            "cpu_time": 375000000.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/48/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 34,
            "run_name": "BM_dispenso_static/48/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 365750599.9999557,
            "cpu_time": 367187500.0,
            "time_unit": "ns"
          },
          {
            "name": "BM_dispenso_static/64/100000000/real_time",
            "family_index": 5,
            "per_family_instance_index": 35,
            "run_name": "BM_dispenso_static/64/100000000/real_time",
            "run_type": "iteration",
            "repetitions": 1,
            "repetition_index": 0,
            "threads": 1,
            "iterations": 2,
            "real_time": 296345549.99984175,
            "cpu_time": 289062500.0,
            "time_unit": "ns"
          }
        ]
      }
    }
  ]
}

================================================
FILE: run_bench.bat
================================================
REM Copyright (c) Meta Platforms, Inc. and affiliates.
REM
REM This source code is licensed under the MIT license found in the
REM LICENSE file in the root directory of this source tree.

@echo off
REM Windows launcher for dispenso benchmarks
REM Sets up Visual Studio build environment and runs run_benchmarks.py
REM Output: benchmark_results.json with system info and benchmark results
call "C:\Program Files (x86)\Microsoft Visual Studio\18\BuildTools\Common7\Tools\VsDevCmd.bat" -arch=amd64 -host_arch=amd64
set PATH=%PATH%;C:\vcpkg\installed\x64-windows\bin
python -u scripts\run_benchmarks.py --build --high-priority --min-time 1.5 -o benchmark_results.json --cmake-args="-G" --cmake-args="Visual Studio 18 2026" --cmake-args="-A" --cmake-args="x64" --cmake-args="-DCMAKE_PREFIX_PATH=C:/vcpkg/installed/x64-windows" --cmake-args="-DCMAKE_DISABLE_FIND_PACKAGE_OpenMP=ON" %*


================================================
FILE: scripts/BENCHMARKING.md
================================================
# Benchmark Generation Guide

This guide covers building dispenso benchmarks and generating performance
charts for the 1.5.0 release across Linux, Mac, and Windows.

## Prerequisites

```bash
pip install matplotlib pandas
```

## Quick Start (Build + Run)

The simplest way to build and run benchmarks is with `run_benchmarks.py --build`:

```bash
# Build and run all benchmarks
python3 scripts/run_benchmarks.py --build \
  -o results/my-platform.json

# Build and run only pipeline benchmark
python3 scripts/run_benchmarks.py --build -B pipeline \
  -o results/my-platform.json

# Use a custom compiler
python3 scripts/run_benchmarks.py --build \
  --cmake-args="-DCMAKE_CXX_COMPILER=g++-13" \
  -o results/my-platform.json

# Clean rebuild with custom build directory
python3 scripts/run_benchmarks.py --build --clean \
  -b /tmp/dispenso-bench \
  -o results/my-platform.json
```

The script auto-detects your platform (e.g., `linux-threadripper-96c`,
`macos-m4-12c`) and includes it in the output. Override with `--platform`:

```bash
python3 scripts/run_benchmarks.py --build \
  --platform my-custom-label \
  -o results/my-custom-label.json
```

### Build Flags

| Flag | Description |
|------|-------------|
| `--build` | Enable cmake configure + build before running |
| `--build-dir` / `-b` | Build directory (default: `/tmp/dispenso-build`) |
| `--source-dir` / `-s` | Dispenso source root (default: auto-detected) |
| `--jobs` / `-j` | Parallel build jobs (default: all cores) |
| `--clean` | Wipe build directory before configuring |
| `--cmake-args` | Extra cmake flags (repeatable) |

## Per-Platform Manual Build

If you prefer to configure and build manually, or need to pass
platform-specific options not covered by `--cmake-args`:

### Linux

```bash
mkdir /tmp/dispenso-bench && cd /tmp/dispenso-bench
cmake <path-to-dispenso> \
  -DDISPENSO_BUILD_TESTS=OFF \
  -DDISPENSO_BUILD_BENCHMARKS=ON \
  -DCMAKE_BUILD_TYPE=Release \
  -DCMAKE_CXX_STANDARD=20 \
  -DCMAKE_CXX_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/g++ \
  -DFETCHCONTENT_SOURCE_DIR_TASKFLOW=~/dispenso_deps/taskflow-3.11.0
cmake --build . -j$(nproc)
```

### Mac

```bash
mkdir /tmp/dispenso-bench && cd /tmp/dispenso-bench
cmake <path-to-dispenso> \
  -DDISPENSO_BUILD_TESTS=OFF \
  -DDISPENSO_BUILD_BENCHMARKS=ON \
  -DCMAKE_BUILD_TYPE=Release \
  -DCMAKE_CXX_STANDARD=20 \
  -DFETCHCONTENT_SOURCE_DIR_TASKFLOW=~/dispenso_deps/taskflow-3.11.0
cmake --build . -j$(sysctl -n hw.ncpu)
```

If using Homebrew GCC instead of Xcode clang, add:
```
  -DCMAKE_CXX_COMPILER=g++-13
```

### Windows

```bat
mkdir C:\tmp\dispenso-bench && cd C:\tmp\dispenso-bench
cmake <path-to-dispenso> ^
  -DDISPENSO_BUILD_TESTS=OFF ^
  -DDISPENSO_BUILD_BENCHMARKS=ON ^
  -DCMAKE_BUILD_TYPE=Release ^
  -DCMAKE_CXX_STANDARD=20 ^
  -DFETCHCONTENT_SOURCE_DIR_TASKFLOW=C:\dispenso_deps\taskflow-3.11.0
cmake --build . --config Release -j
```

Then run benchmarks against the existing build:

```bash
python3 scripts/run_benchmarks.py \
  -b /tmp/dispenso-bench \
  -o results/<platform-name>.json
```

## Step 1: Run Benchmarks

After building (either via `--build` or manually), run all benchmarks and
capture results as JSON:

```bash
python3 scripts/run_benchmarks.py \
  -b /tmp/dispenso-bench \
  -o results/<platform-name>.json
```

Where `<platform-name>` is a descriptive identifier like `linux-threadripper-96c`,
`macos-m4-12c`, `windows-zen4-24c`, etc. (auto-detected if omitted).

### Run Options

- `-B <regex>` — filter which benchmark executables to run
  (e.g. `-B "pipeline|simple_pool"`)
- `-f <pattern>` — filter individual test cases within benchmarks
  (passed to `--benchmark_filter`)
- `--min-time <seconds>` — minimum time per benchmark
- `--tcmalloc <path>` — use tcmalloc via LD_PRELOAD
- `-p <name>` — override auto-detected platform identifier

## Step 2: Generate Charts

Generate per-platform charts from the JSON results:

```bash
python3 scripts/generate_charts.py \
  -i results/<platform-name>.json \
  -o docs/benchmarks/ \
  --platform <platform-name>
```

Charts are written to `docs/benchmarks/<platform-name>/`.

## Step 3: Compose Multi-Platform Landing Page

After collecting JSON from all platforms, generate a unified landing page:

```bash
python3 scripts/update_benchmarks.py --compose \
  linux-threadripper:results/linux.json \
  macos-m2:results/macos.json \
  windows-zen4:results/windows.json
```

This creates per-platform subdirectories under `docs/benchmarks/` and a
unified `benchmark_results.md` landing page.

## All-in-One (Single Platform)

`update_benchmarks.py` can build, run, chart, and update docs in one command:

```bash
python3 scripts/update_benchmarks.py \
  --platform linux-threadripper

# Or with an existing build:
python3 scripts/update_benchmarks.py \
  -b /tmp/dispenso-bench \
  --skip-build \
  --platform linux-threadripper
```

## Notes

- Build directories should be in `/tmp` (or equivalent) to avoid polluting
  the source tree
- Taskflow must be available locally (FetchContent fallback requires internet)
- The benchmark runner automatically collects machine info (CPU, cores, memory)
  and includes it in the JSON output
- Platform identifiers are auto-detected from machine info when not specified
- Chart names are mapped to README-expected filenames via `README_CHART_MAPPING`
  in `update_benchmarks.py`


================================================
FILE: scripts/compare_benchmarks.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""
Compare dispenso benchmark results across multiple runs.

Shows side-by-side timing and percentage change for each benchmark test.
Useful for A/B testing optimization changes.

Usage:
    python scripts/compare_benchmarks.py baseline.json after.json
    python scripts/compare_benchmarks.py baseline.json v2.json v3.json --filter concurrent_vector
    python scripts/compare_benchmarks.py baseline.json after.json --prefix BM_dispenso
"""

import argparse
import json
import sys
from pathlib import Path
from typing import Dict, List, Optional


def format_time(ns: float) -> str:
    """Format nanoseconds into a human-readable string."""
    if ns < 1000:
        return f"{ns:.0f}ns"
    elif ns < 1e6:
        return f"{ns / 1e3:.1f}us"
    elif ns < 1e9:
        return f"{ns / 1e6:.1f}ms"
    else:
        return f"{ns / 1e9:.2f}s"


def _pct_change(val: float, base_val: float) -> Optional[float]:
    """Return percentage change from base_val to val, or None if base is zero."""
    if base_val == 0:
        return None
    return ((val - base_val) / base_val) * 100


_TIME_UNIT_MULTIPLIERS = {"us": 1000, "ms": 1e6, "s": 1e9}


def _normalize_to_ns(time_val: float, unit: str) -> float:
    """Convert a time value to nanoseconds based on its unit string."""
    return time_val * _TIME_UNIT_MULTIPLIERS.get(unit, 1)


def load_benchmarks(
    path: Path,
    file_filter: Optional[str] = None,
) -> Dict[str, float]:
    """Load benchmark results, returning {test_name: time_ns}.

    When --benchmark_repetitions is used, Google Benchmark outputs both
    individual iteration rows (run_type=iteration) and aggregate rows
    (run_type=aggregate) with names like "BM_foo_median", "BM_foo_mean", etc.
    We prefer the median aggregate when available, falling back to the
    individual iteration row.
    """
    with open(path) as f:
        data = json.load(f)

    iteration_results = {}
    median_results = {}
    for entry in data.get("results", []):
        if not entry.get("success"):
            continue
        exe_name = entry["name"].replace(".exe", "")
        if file_filter and file_filter.lower() not in exe_name.lower():
            continue
        for bm in entry.get("data", {}).get("benchmarks", []):
            name = bm["name"]
            time_ns = _normalize_to_ns(bm["real_time"], bm.get("time_unit", "ns"))

            run_type = bm.get("run_type")
            if run_type == "iteration":
                if name not in iteration_results:
                    iteration_results[name] = time_ns
            elif run_type == "aggregate" and name.endswith("_median"):
                base_name = name[: -len("_median")]
                median_results[base_name] = time_ns

    # Prefer median aggregates over raw iteration values
    results = {}
    for name in set(list(iteration_results.keys()) + list(median_results.keys())):
        if name in median_results:
            results[name] = median_results[name]
        elif name in iteration_results:
            results[name] = iteration_results[name]
    return results


def get_compiler_info(path: Path) -> str:
    """Extract compiler summary from benchmark file."""
    with open(path) as f:
        data = json.load(f)
    mi = data.get("machine_info", {})
    compiler = mi.get("compiler", {})
    return compiler.get("compiler_summary", "unknown")


def _format_comparison_cell(val: Optional[float], base_val: Optional[float]) -> str:
    """Format one comparison column cell (time + percentage change)."""
    if val is None:
        return f"  {'---':>10} {'':>7}"
    cell = f"  {format_time(val):>10}"
    if base_val is None:
        return cell + f" {'N/A':>7}"
    pct = _pct_change(val, base_val)
    if pct is None:
        return cell + f" {'N/A':>7}"
    marker = " <<<" if abs(pct) > 15 else ""
    return cell + f" {pct:>+6.1f}%{marker}"


def _print_summary(
    datasets: List[Dict[str, float]],
    labels: List[str],
    tests: List[str],
):
    """Print summary of significant changes (>10%) vs baseline."""
    base_label = labels[0]
    for i, label in enumerate(labels[1:], 1):
        improvements = []
        regressions = []
        for test in tests:
            base_val = datasets[0].get(test)
            val = datasets[i].get(test)
            if base_val is None or val is None:
                continue
            pct = _pct_change(val, base_val)
            if pct is None:
                continue
            if pct < -10:
                improvements.append((test, pct))
            elif pct > 10:
                regressions.append((test, pct))

        print(f"[{label}] vs [{base_label}] significant changes (>10%):")
        if improvements:
            for test, pct in sorted(improvements, key=lambda x: x[1]):
                print(f"  {pct:>+7.1f}%  {test}")
        if regressions:
            for test, pct in sorted(regressions, key=lambda x: -x[1]):
                print(f"  {pct:>+7.1f}%  {test}")
        if not improvements and not regressions:
            print("  (none)")
        print()


def compare(
    files: List[Path],
    labels: List[str],
    file_filter: Optional[str] = None,
    prefix_filter: Optional[str] = None,
):
    """Compare benchmark results across files."""
    datasets = [load_benchmarks(path, file_filter) for path in files]

    # Print headers with compiler info
    print("Runs:")
    for i, (path, label) in enumerate(zip(files, labels)):
        compiler = get_compiler_info(path)
        count = len(datasets[i])
        print(f"  [{label}] {path.name} ({compiler}, {count} tests)")
    print()

    # Collect and filter test names
    all_tests = set()
    for ds in datasets:
        all_tests.update(ds.keys())
    if prefix_filter:
        all_tests = {t for t in all_tests if t.startswith(prefix_filter)}

    if not all_tests:
        print("No matching tests found.")
        return

    tests = sorted(all_tests)
    name_width = min(max(len(t) for t in tests), 55)

    # Print header
    base_label = labels[0]
    header = f"{'Test':<{name_width}}  {base_label:>10}"
    for label in labels[1:]:
        header += f"  {label:>10} {'chg%':>7}"
    print(header)
    print("-" * len(header))

    # Print each test row
    for test in tests:
        name = test if len(test) <= name_width else test[: name_width - 2] + ".."
        base_val = datasets[0].get(test)

        line = f"{name:<{name_width}}"
        line += (
            f"  {format_time(base_val):>10}"
            if base_val is not None
            else f"  {'---':>10}"
        )
        for i in range(1, len(labels)):
            line += _format_comparison_cell(datasets[i].get(test), base_val)
        print(line)

    print()
    _print_summary(datasets, labels, tests)


def main():
    parser = argparse.ArgumentParser(
        description="Compare dispenso benchmark results across runs"
    )
    parser.add_argument(
        "files",
        type=Path,
        nargs="+",
        help="Benchmark JSON files (first is baseline, rest are compared against it)",
    )
    parser.add_argument(
        "--labels",
        type=str,
        default=None,
        help="Comma-separated labels for each file (default: base,v2,v3,...)",
    )
    parser.add_argument(
        "--filter",
        type=str,
        default=None,
        help="Filter to benchmark files matching this substring (e.g., 'concurrent_vector')",
    )
    parser.add_argument(
        "--prefix",
        type=str,
        default=None,
        help="Filter to test names starting with this prefix (e.g., 'BM_dispenso')",
    )

    args = parser.parse_args()

    for f in args.files:
        if not f.exists():
            print(f"Error: {f} not found")
            sys.exit(1)

    if args.labels:
        labels = args.labels.split(",")
        if len(labels) != len(args.files):
            print(f"Error: {len(labels)} labels for {len(args.files)} files")
            sys.exit(1)
    else:
        labels = ["base"] + [f"v{i + 1}" for i in range(len(args.files) - 1)]

    compare(args.files, labels, args.filter, args.prefix)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/generate_charts.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""
Chart generator for dispenso benchmarks.

This script takes benchmark results (JSON from run_benchmarks.py) and generates
comparison charts and a markdown report.

Usage:
    python generate_charts.py --input results.json --output-dir charts/

Requirements:
    pip install matplotlib pandas
"""

import argparse
import json
import re
from pathlib import Path
from typing import Any, Dict, List

try:
    import matplotlib.pyplot as plt
    import pandas as pd

    HAS_PLOTTING = True
except ImportError:
    HAS_PLOTTING = False
    print("Error: matplotlib/pandas required for chart generation")
    print("Install with: pip install matplotlib pandas")


# Consistent color palette for libraries across all charts
LIBRARY_COLORS = {
    # Primary libraries
    "dispenso": "#2ecc71",  # Green
    "tbb": "#3498db",  # Blue
    "omp": "#e74c3c",  # Red
    "folly": "#9b59b6",  # Purple
    "taskflow": "#f39c12",  # Orange
    # Standard library variants
    "std": "#1abc9c",  # Teal
    "vector": "#1abc9c",  # Teal (same as std)
    "deque": "#e67e22",  # Dark orange
    "serial": "#95a5a6",  # Gray
    # Dispenso variants
    "dispenso2": "#27ae60",  # Darker green
    "dispenso auto chunk": "#58d68d",  # Lighter green
    "dispenso static chunk": "#1e8449",  # Dark green
    "dispenso mostly idle": "#82e0aa",  # Pale green
    "dispenso very idle": "#abebc6",  # Very pale green
    # TBB variants
    "tbb concurrent vector": "#5dade2",  # Light blue
    # Async
    "async": "#f1c40f",  # Yellow
}


def get_library_color(name: str) -> str:
    """Get consistent color for a library name."""
    name_lower = name.lower().replace("_", " ")

    # Try exact match first
    if name_lower in LIBRARY_COLORS:
        return LIBRARY_COLORS[name_lower]

    # Try prefix matching
    for key, color in LIBRARY_COLORS.items():
        if name_lower.startswith(key) or key in name_lower:
            return color

    # Default color for unknown libraries
    return "#7f8c8d"  # Gray


def get_time_scale(values, original_unit: str = "ns"):
    """Determine appropriate time unit and scale factor for display.

    Returns (scale_factor, display_unit) where:
    - scale_factor: divide values by this to get display values
    - display_unit: string label for the axis (e.g., "ms", "µs")

    This avoids matplotlib's scientific notation by choosing appropriate units.
    """
    max_val = max(values) if len(values) > 0 else 0

    if original_unit == "ns":
        if max_val >= 1e9:
            return 1e9, "s"
        elif max_val >= 1e6:
            return 1e6, "ms"
        elif max_val >= 1e3:
            return 1e3, "µs"
        else:
            return 1, "ns"
    elif original_unit == "us":
        if max_val >= 1e6:
            return 1e6, "s"
        elif max_val >= 1e3:
            return 1e3, "ms"
        else:
            return 1, "µs"
    elif original_unit == "ms":
        if max_val >= 1e3:
            return 1e3, "s"
        else:
            return 1, "ms"
    else:
        return 1, original_unit


def extract_benchmark_data(results: List[Dict]) -> "pd.DataFrame":
    """Extract benchmark data into a DataFrame for plotting."""
    rows = []

    for result in results:
        if not result.get("success") or "data" not in result:
            continue

        data = result["data"]
        benchmarks = data.get("benchmarks", [])

        for bm in benchmarks:
            row = {
                "suite": result["name"].replace("_benchmark", ""),
                "name": bm.get("name", "unknown"),
                "real_time": bm.get("real_time", 0),
                "cpu_time": bm.get("cpu_time", 0),
                "time_unit": bm.get("time_unit", "ns"),
                "iterations": bm.get("iterations", 0),
                "skipped": bm.get("error_occurred", False),
                "error_message": bm.get("error_message", ""),
            }

            # Extract any custom counters
            for key, value in bm.items():
                if key not in row and isinstance(value, (int, float)):
                    row[key] = value

            rows.append(row)

    return pd.DataFrame(rows)


def parse_benchmark_name(name: str) -> Dict[str, Any]:
    """Parse benchmark name to extract library, threads, worksize from patterns like:
    BM_library/threads/worksize/real_time or BM_library<template>
    """
    result = {"library": name, "threads": None, "worksize": None, "raw": name}

    # Handle template-style names like BM_serial<kSmallSize>
    if "<" in name and ">" in name:
        match = re.match(r"([^<]+)<([^>]+)>", name)
        if match:
            result["library"] = match.group(1)
            result["worksize"] = match.group(2)
        return result

    # Handle slash-delimited names like BM_dispenso/8/1000/real_time
    parts = name.split("/")
    # Filter out "real_time" suffix if present
    if parts and parts[-1] == "real_time":
        parts = parts[:-1]

    if len(parts) >= 3:
        # Pattern: library/threads/worksize
        result["library"] = parts[0]
        try:
            result["threads"] = int(parts[1])
            result["worksize"] = parts[2]
        except (ValueError, IndexError):
            pass
    elif len(parts) == 2:
        # Pattern: library/threads (no worksize, e.g., BM_dispenso_very_idle/8)
        result["library"] = parts[0]
        try:
            result["threads"] = int(parts[1])
            result["worksize"] = "default"
        except ValueError:
            pass

    return result


def generate_line_chart(
    suite: str,
    suite_df: "pd.DataFrame",
    output_dir: Path,
    worksize: str,
) -> Path:
    """Generate a line chart for thread scaling benchmarks."""
    fig, ax = plt.subplots(figsize=(12, 8))

    # Get data for this worksize
    ws_df = suite_df[suite_df["worksize"] == worksize].copy()
    if ws_df.empty:
        plt.close()
        return None

    # Determine appropriate time units
    original_unit = ws_df["time_unit"].iloc[0]
    scale, display_unit = get_time_scale(ws_df["real_time"].values, original_unit)

    # Group by library and plot each as a line
    libraries = sorted(ws_df["library"].unique())

    has_data = False
    for lib in libraries:
        lib_df = ws_df[ws_df["library"] == lib].sort_values("threads")
        # Filter out invalid results (time = 0 or near 0, often indicates failed runs)
        lib_df = lib_df[lib_df["real_time"] > 0.01]
        if not lib_df.empty and lib_df["threads"].notna().any():
            # Clean up library name for legend
            label = lib.replace("BM_", "").replace("_", " ")
            color = get_library_color(label)
            ax.plot(
                lib_df["threads"],
                lib_df["real_time"] / scale,
                marker="o",
                label=label,
                color=color,
                linewidth=2,
                markersize=6,
            )
            has_data = True

    if not has_data:
        plt.close()
        return None

    # Format worksize for title
    ws_display = worksize
    if worksize.isdigit():
        ws_int = int(worksize)
        if ws_int >= 1000000:
            ws_display = f"{ws_int // 1000000}M"
        elif ws_int >= 1000:
            ws_display = f"{ws_int // 1000}K"

    # Clean up suite name for title
    suite_display = suite.replace("_", " ").title()

    ax.set_xlabel("Threads", fontsize=12)
    ax.set_ylabel(f"Time ({display_unit})", fontsize=12)
    ax.set_title(f"{suite_display} - {ws_display} Elements", fontsize=14)
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis="both", labelsize=10)
    ax.legend(
        loc="upper center",
        bbox_to_anchor=(0.5, -0.08),
        ncol=min(5, len(libraries)),
        fontsize=11,
    )

    plt.tight_layout()
    chart_path = output_dir / f"{suite}_{worksize}_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    return chart_path


def generate_bar_chart(suite: str, suite_df: "pd.DataFrame", output_dir: Path) -> Path:
    """Generate a vertical bar chart for simple benchmarks."""
    import numpy as np

    # For simple benchmarks, just show each benchmark as its own bar
    # Skip the complex operation/library parsing that causes issues

    num_bars = len(suite_df)
    fig_width = max(10, min(20, num_bars * 1.2))
    fig, ax = plt.subplots(figsize=(fig_width, 8))

    # Sort for logical grouping: serial implementations first, then parallel
    # Within each group: serial baseline first, then dispenso, then others alphabetically
    def library_order(lib):
        lib_lower = lib.lower()
        if lib_lower == "serial":
            return (0, lib_lower)  # Serial baseline first
        elif lib_lower.startswith("dispenso"):
            return (1, lib_lower)  # Dispenso second
        elif lib_lower.startswith("taskflow"):
            return (2, lib_lower)  # Taskflow third
        else:
            return (3, lib_lower)  # Others (tbb, etc.) alphabetically after

    def sort_key(name):
        clean = name.replace("BM_", "").replace("/real_time", "")
        # Group parallel implementations after serial ones
        is_parallel = "_par" in clean or clean.endswith("par")
        # Extract library name for ordering
        lib = clean.replace("_par", "").replace(" par", "")
        return (1 if is_parallel else 0, library_order(lib))

    suite_df_sorted = suite_df.copy()
    suite_df_sorted["sort_key"] = suite_df_sorted["name"].apply(sort_key)
    suite_df_sorted = suite_df_sorted.sort_values("sort_key")
    suite_df_sorted = suite_df_sorted.drop(columns=["sort_key"])

    # Clean up labels and get colors
    labels = suite_df_sorted["name"].apply(
        lambda x: x.replace("BM_", "").replace("/real_time", "").replace("_", " ")
    )
    colors = [get_library_color(label) for label in labels]

    # Convert to sensible units based on max value
    max_val = suite_df_sorted["real_time"].max()
    original_unit = suite_df_sorted["time_unit"].iloc[0]

    # Determine best display unit and scale factor
    if original_unit == "ns":
        if max_val >= 1e9:
            scale = 1e9
            display_unit = "s"
        elif max_val >= 1e6:
            scale = 1e6
            display_unit = "ms"
        elif max_val >= 1e3:
            scale = 1e3
            display_unit = "µs"
        else:
            scale = 1
            display_unit = "ns"
    else:
        scale = 1
        display_unit = original_unit

    scaled_values = suite_df_sorted["real_time"] / scale

    x = np.arange(len(labels))
    bars = ax.bar(x, scaled_values, color=colors)

    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=45, ha="right", fontsize=10)

    # Clean up suite name for title
    suite_display = suite.replace("_", " ").title()

    ax.set_ylabel(f"Time ({display_unit})", fontsize=12)
    ax.set_title(f"{suite_display} Benchmark Results", fontsize=14)
    ax.grid(axis="y", alpha=0.3)
    ax.tick_params(axis="y", labelsize=10)

    # Add value labels on top of bars
    max_scaled = scaled_values.max()
    for bar, val in zip(bars, scaled_values):
        # Format value for display
        if val >= 1000:
            label = f"{val:.0f}"
        elif val >= 100:
            label = f"{val:.1f}"
        elif val >= 10:
            label = f"{val:.1f}"
        else:
            label = f"{val:.2f}"

        ax.text(
            bar.get_x() + bar.get_width() / 2,
            bar.get_height(),
            label,
            ha="center",
            va="bottom",
            fontsize=9,
            rotation=90 if num_bars > 15 else 0,
        )

    # Add some headroom for labels
    ax.set_ylim(0, max_scaled * 1.15)

    plt.tight_layout()
    chart_path = output_dir / f"{suite}_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    return chart_path


def generate_grouped_bar_chart(
    suite: str,
    suite_df: "pd.DataFrame",
    output_dir: Path,
    operations: list,
    libraries: list,
) -> Path:
    """Generate a grouped vertical bar chart comparing libraries per operation."""
    import numpy as np

    num_ops = len(operations)
    num_libs = len(libraries)
    fig_width = max(12, min(24, num_ops * num_libs * 0.4))
    fig, ax = plt.subplots(figsize=(fig_width, 8))

    x = np.arange(num_ops)
    width = 0.8 / num_libs

    max_val = 0
    for i, lib in enumerate(libraries):
        lib_df = suite_df[suite_df["lib"] == lib]
        values = []
        for op in operations:
            op_df = lib_df[lib_df["operation"] == op]
            if not op_df.empty:
                values.append(op_df["real_time"].iloc[0])
            else:
                values.append(0)

        values = np.array(values)
        max_val = max(max_val, values.max())
        color = get_library_color(lib)
        bars = ax.bar(
            x + i * width - width * num_libs / 2 + width / 2,
            values,
            width,
            label=lib,
            color=color,
        )

        # Add value labels
        for bar, val in zip(bars, values):
            if val > 0:
                if val >= 1e9:
                    label = f"{val / 1e9:.1f}B"
                elif val >= 1e6:
                    label = f"{val / 1e6:.1f}M"
                elif val >= 1e3:
                    label = f"{val / 1e3:.1f}K"
                else:
                    label = f"{val:.0f}"

                ax.text(
                    bar.get_x() + bar.get_width() / 2,
                    bar.get_height(),
                    label,
                    ha="center",
                    va="bottom",
                    fontsize=8,
                    rotation=90,
                )

    ax.set_xticks(x)
    ax.set_xticklabels(operations, rotation=45, ha="right", fontsize=10)

    suite_display = suite.replace("_", " ").title()
    ax.set_ylabel(f"Time ({suite_df['time_unit'].iloc[0]})", fontsize=12)
    ax.set_title(f"{suite_display} Benchmark Results", fontsize=14)
    ax.legend(loc="upper right", fontsize=11)
    ax.grid(axis="y", alpha=0.3)
    ax.tick_params(axis="y", labelsize=10)
    ax.set_ylim(0, max_val * 1.2)

    plt.tight_layout()
    chart_path = output_dir / f"{suite}_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    return chart_path


def generate_horizontal_grouped_bar_chart(
    title: str,
    suite_df: "pd.DataFrame",
    output_dir: Path,
    chart_name: str,
) -> Path:
    """Generate a horizontal grouped bar chart with log scale for comparing libraries."""
    import numpy as np

    # Parse benchmark names to extract operation and library
    def parse_cv_benchmark(name: str):
        """Parse concurrent_vector benchmark name into operation and library."""
        clean = name.replace("BM_", "").replace("/real_time", "")
        # Library prefixes
        lib_prefixes = ["std_", "deque_", "tbb_", "dispenso_"]
        for prefix in lib_prefixes:
            if clean.startswith(prefix):
                lib = prefix.rstrip("_")
                op = clean[len(prefix) :]
                # Map library names to display names
                lib_map = {
                    "std": "std::vector",
                    "deque": "std::deque",
                    "tbb": "tbb::concurrent_vector",
                    "dispenso": "dispenso::ConcurrentVector",
                }
                return op, lib_map.get(lib, lib)
        return clean, "unknown"

    suite_df = suite_df.copy()
    parsed = suite_df["name"].apply(parse_cv_benchmark)
    suite_df["operation"] = parsed.apply(lambda x: x[0])
    suite_df["lib"] = parsed.apply(lambda x: x[1])

    # Get unique operations and libraries
    operations = suite_df["operation"].unique().tolist()
    libraries = [
        "std::vector",
        "std::deque",
        "tbb::concurrent_vector",
        "dispenso::ConcurrentVector",
    ]
    libraries = [lib for lib in libraries if lib in suite_df["lib"].values]

    num_ops = len(operations)
    num_libs = len(libraries)
    fig_height = max(6, num_ops * 0.5)
    fig, ax = plt.subplots(figsize=(12, fig_height))

    y = np.arange(num_ops)
    height = 0.8 / num_libs

    # Library colors
    lib_colors = {
        "std::vector": "#808080",  # gray
        "std::deque": "#FF8C00",  # orange
        "tbb::concurrent_vector": "#4285F4",  # blue
        "dispenso::ConcurrentVector": "#34A853",  # green
    }

    for i, lib in enumerate(libraries):
        lib_df = suite_df[suite_df["lib"] == lib]
        values = []
        for op in operations:
            op_df = lib_df[lib_df["operation"] == op]
            if not op_df.empty:
                values.append(op_df["real_time"].iloc[0])
            else:
                values.append(0)

        values = np.array(values)
        color = lib_colors.get(lib, "#888888")
        ax.barh(
            y + i * height - height * num_libs / 2 + height / 2,
            values,
            height,
            label=lib,
            color=color,
        )

    ax.set_yticks(y)
    ax.set_yticklabels(operations, fontsize=10)
    ax.set_xscale("log")
    ax.set_xlabel("Time (ns) - log scale", fontsize=12)
    ax.set_title(title, fontsize=14)
    ax.legend(loc="upper right", fontsize=10)
    ax.grid(axis="x", alpha=0.3)

    plt.tight_layout()
    chart_path = output_dir / f"{chart_name}.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    return chart_path


def generate_concurrent_vector_charts(
    df: "pd.DataFrame", output_dir: Path, suffix: str = ""
):
    """Generate specialized charts for concurrent_vector benchmarks.

    Args:
        df: DataFrame with benchmark data
        output_dir: Output directory for charts
        suffix: Optional suffix for chart names (e.g., "_tcmalloc")
    """
    # Define operation categories
    serial_ops = [
        "push_back_serial",
        "push_back_serial_reserve",
        "iterate",
        "iterate_reverse",
        "lower_bound",
        "index",
        "random",
    ]
    parallel_ops = [
        "parallel",
        "parallel_reserve",
        "parallel_clear",
        "parallel_grow_by_10",
        "parallel_grow_by_100",
        "parallel_grow_by_max",
    ]

    # More precise filtering
    def get_operation(name):
        clean = name.replace("BM_", "").replace("/real_time", "")
        for prefix in ["std_", "deque_", "tbb_", "dispenso_"]:
            if clean.startswith(prefix):
                return clean[len(prefix) :]
        return clean

    df_with_op = df.copy()
    df_with_op["operation"] = df_with_op["name"].apply(get_operation)

    serial_df = df_with_op[df_with_op["operation"].isin(serial_ops)]
    parallel_df = df_with_op[df_with_op["operation"].isin(parallel_ops)]

    # Title suffix for tcmalloc variant
    title_suffix = " (tcmalloc)" if suffix else ""

    charts = []

    if not serial_df.empty:
        chart = generate_horizontal_grouped_bar_chart(
            f"Concurrent Vector - Serial/Access Operations{title_suffix}",
            serial_df,
            output_dir,
            f"concurrent_vector_serial{suffix}_chart",
        )
        charts.append(chart)
        print(f"Generated: {chart}")

    if not parallel_df.empty:
        chart = generate_horizontal_grouped_bar_chart(
            f"Concurrent Vector - Parallel Operations{title_suffix}",
            parallel_df,
            output_dir,
            f"concurrent_vector_parallel{suffix}_chart",
        )
        charts.append(chart)
        print(f"Generated: {chart}")

    return charts


def generate_simple_for_charts(
    suite_df: "pd.DataFrame", output_dir: Path, suite: str = "simple_for"
):
    """Generate specialized charts for simple_for/summing_for/trivial_compute benchmarks.

    - Filters out auto_chunk and static_chunk variants (just uses 'dispenso')
    - Creates zoomed charts focused on competitive range
    - No max_threads comparison chart
    """

    # Filter out auto_chunk and static_chunk - just keep base dispenso
    def should_keep(lib):
        lib_lower = lib.lower()
        if "auto" in lib_lower or "static" in lib_lower:
            return False
        return True

    suite_df = suite_df[suite_df["library"].apply(should_keep)].copy()

    worksizes = suite_df["worksize"].dropna().unique()
    charts = []

    for worksize in sorted(
        worksizes,
        key=lambda x: (not str(x).isdigit(), int(x) if str(x).isdigit() else 0),
    ):
        ws_df = suite_df[suite_df["worksize"] == worksize].copy()
        if ws_df.empty:
            continue

        # Format worksize for title
        ws_display = worksize
        if str(worksize).isdigit():
            ws_int = int(worksize)
            if ws_int >= 1000000000:
                ws_display = f"{ws_int // 1000000000}B"
            elif ws_int >= 1000000:
                ws_display = f"{ws_int // 1000000}M"
            elif ws_int >= 1000:
                ws_display = f"{ws_int // 1000}K"

        # Generate main chart
        fig, ax = plt.subplots(figsize=(12, 8))

        # Determine appropriate time units for this worksize
        original_unit = ws_df["time_unit"].iloc[0]
        scale, display_unit = get_time_scale(ws_df["real_time"].values, original_unit)

        libraries = sorted(ws_df["library"].unique())
        has_data = False
        for lib in libraries:
            lib_df = ws_df[ws_df["library"] == lib].sort_values("threads")
            if not lib_df.empty and lib_df["threads"].notna().any():
                label = lib.replace("BM_", "").replace("_", " ")
                color = get_library_color(label)
                ax.plot(
                    lib_df["threads"],
                    lib_df["real_time"] / scale,
                    marker="o",
                    label=label,
                    color=color,
                    linewidth=2,
                    markersize=6,
                )
                has_data = True

        if not has_data:
            plt.close()
            continue

        suite_display = suite.replace("_", " ").title()
        ax.set_xlabel("Threads", fontsize=12)
        ax.set_ylabel(f"Time ({display_unit})", fontsize=12)
        ax.set_title(f"{suite_display} - {ws_display} Elements", fontsize=14)
        ax.grid(True, alpha=0.3)
        ax.tick_params(axis="both", labelsize=10)
        ax.legend(
            loc="upper center",
            bbox_to_anchor=(0.5, -0.08),
            ncol=min(5, len(libraries)),
            fontsize=11,
        )

        plt.tight_layout()
        chart_path = output_dir / f"{suite}_{worksize}_chart.png"
        plt.savefig(chart_path, dpi=150, bbox_inches="tight")
        plt.close()
        charts.append(chart_path)
        print(f"Generated: {chart_path}")

        # Generate zoomed chart focusing on competitive libraries (dispenso, tbb, omp)
        # Find the max value among competitive libraries to set appropriate y-limit
        competitive_libs = ["BM_dispenso", "BM_tbb", "BM_omp"]
        competitive_df = ws_df[ws_df["library"].isin(competitive_libs)]

        if not competitive_df.empty and len(competitive_df["library"].unique()) > 1:
            fig, ax = plt.subplots(figsize=(12, 8))

            # Determine y-limit: use values from mid-range thread counts
            # Use data-driven percentile-based range, zoom aggressively
            thread_min = competitive_df["threads"].quantile(0.25)
            thread_max = competitive_df["threads"].quantile(0.75)
            mid_thread_df = competitive_df[
                (competitive_df["threads"] >= thread_min)
                & (competitive_df["threads"] <= thread_max)
            ]
            if not mid_thread_df.empty:
                # Use 1.5x for tight zoom that focuses on competitive differences
                y_max = (mid_thread_df["real_time"].max() * 1.5) / scale
            else:
                # Fallback: use median-based limit
                y_max = (competitive_df["real_time"].median() * 2) / scale

            for lib in libraries:
                lib_df = ws_df[ws_df["library"] == lib].sort_values("threads")
                if not lib_df.empty and lib_df["threads"].notna().any():
                    label = lib.replace("BM_", "").replace("_", " ")
                    color = get_library_color(label)
                    ax.plot(
                        lib_df["threads"],
                        lib_df["real_time"] / scale,
                        marker="o",
                        label=label,
                        color=color,
                        linewidth=2,
                        markersize=6,
                    )

            # Only zoom y-axis, keep full x-axis range to show divergence
            ax.set_ylim(0, y_max)
            ax.set_xlabel("Threads", fontsize=12)
            ax.set_ylabel(f"Time ({display_unit})", fontsize=12)
            ax.set_title(
                f"{suite_display} - {ws_display} Elements (Y-Axis Zoomed)", fontsize=14
            )
            ax.grid(True, alpha=0.3)
            ax.tick_params(axis="both", labelsize=10)
            ax.legend(
                loc="upper center",
                bbox_to_anchor=(0.5, -0.08),
                ncol=min(5, len(libraries)),
                fontsize=11,
            )

            plt.tight_layout()
            chart_path = output_dir / f"{suite}_{worksize}_zoomed_chart.png"
            plt.savefig(chart_path, dpi=150, bbox_inches="tight")
            plt.close()
            charts.append(chart_path)
            print(f"Generated: {chart_path}")

    return charts


def _parse_future_benchmark(name: str):
    """Parse future benchmark name into (implementation, size)."""
    clean = name.replace("BM_", "").replace("/real_time", "")

    # Extract size
    size = None
    for sz in ["kSmallSize", "kMediumSize", "kLargeSize"]:
        if sz in clean:
            size = sz.replace("k", "").replace("Size", "")  # "Small", "Medium", "Large"
            clean = clean.replace(f"<{sz}>", "")
            break

    # Map implementation names
    impl_map = {
        "serial_tree": "serial",
        "std_tree": "std::async",
        "folly_tree": "folly::Future",
        "dispenso_tree": "dispenso::Future",
        "dispenso_tree_when_all": "dispenso::when_all",
        "dispenso_taskset_tree": "dispenso::TaskSet",
        "dispenso_taskset_tree_bulk": "dispenso::TaskSet (bulk)",
    }

    impl = impl_map.get(clean, clean.replace("_", " "))
    return impl, size


def _format_time_value(val_ns):
    """Format time value in appropriate units."""
    if val_ns >= 1e9:
        return f"{val_ns / 1e9:.1f}s"
    elif val_ns >= 1e6:
        return f"{val_ns / 1e6:.1f}ms"
    elif val_ns >= 1e3:
        return f"{val_ns / 1e3:.0f}µs"
    else:
        return f"{val_ns:.0f}ns"


# Colors for future benchmark implementations
_FUTURE_IMPL_COLORS = {
    "serial": "#95a5a6",  # Gray
    "std::async": "#e74c3c",  # Red
    "folly::Future": "#e67e22",  # Orange
    "dispenso::Future": "#3498db",  # Blue
    "dispenso::TaskSet": "#2ecc71",  # Green
    "dispenso::TaskSet (bulk)": "#1e8449",  # Dark green
    "dispenso::when_all": "#9b59b6",  # Purple
}


def _add_future_bar_value_labels(ax, all_bars, all_values, y_max):
    """Add value labels on bars for zoomed future charts."""
    for bars, values in zip(all_bars, all_values):
        for bar, val in zip(bars, values):
            if val > 0:
                label = _format_time_value(val)
                bar_center_x = bar.get_x() + bar.get_width() / 2
                if val > y_max:
                    ax.text(
                        bar_center_x,
                        y_max * 0.95,
                        label,
                        ha="center",
                        va="top",
                        fontsize=7,
                        color="white",
                        fontweight="bold",
                    )
                else:
                    ax.text(
                        bar_center_x,
                        bar.get_height() + y_max * 0.01,
                        label,
                        ha="center",
                        va="bottom",
                        fontsize=7,
                    )


def _render_future_bar_chart(
    suite_df: "pd.DataFrame",
    implementations: list,
    sizes: list,
    output_dir: Path,
    chart_name: str,
    title: str,
    y_max=None,
    show_value_labels: bool = False,
) -> Path:
    """Render a grouped bar chart for future benchmark results.

    Args:
        y_max: If set, clip y-axis at this value (for zoomed charts).
        show_value_labels: If True, add value labels on bars.
    """
    import numpy as np

    num_impls = len(implementations)
    num_sizes = len(sizes)
    fig, ax = plt.subplots(figsize=(14, 8))

    x = np.arange(num_sizes)
    width = 0.8 / num_impls

    max_val = 0
    dnf_positions = []
    all_bars = []
    all_values = []

    for i, impl in enumerate(implementations):
        impl_df = suite_df[suite_df["implementation"] == impl]
        values = []
        for size in sizes:
            size_df = impl_df[impl_df["size"] == size]
            values.append(size_df["real_time"].iloc[0] if not size_df.empty else 0)

        values = np.array(values)
        if values.max() > 0:
            max_val = max(max_val, values.max())
        color = _FUTURE_IMPL_COLORS.get(impl, "#888888")
        bar_x = x + i * width - width * num_impls / 2 + width / 2
        bars = ax.bar(bar_x, values, width, label=impl, color=color)
        all_bars.append(bars)
        all_values.append(values)

        # Track zero-value positions for "DID NOT FINISH" labels
        for j, val in enumerate(values):
            if val == 0 and not impl_df.empty:
                dnf_positions.append((bar_x[j], impl))

    # Draw "DID NOT FINISH" labels
    ref_max = y_max if y_max is not None else max_val
    for bar_x_pos, impl in dnf_positions:
        ax.text(
            bar_x_pos,
            ref_max * 0.02,
            "DID NOT\nFINISH",
            ha="center",
            va="bottom",
            fontsize=7,
            rotation=90,
            color=_FUTURE_IMPL_COLORS.get(impl, "#888888"),
            fontweight="bold",
            fontstyle="italic",
        )

    # Add value labels on the zoomed chart
    if show_value_labels and y_max is not None:
        _add_future_bar_value_labels(ax, all_bars, all_values, y_max)

    ax.set_xticks(x)
    ax.set_xticklabels(sizes, fontsize=12)
    ax.set_xlabel("Tree Size", fontsize=12)
    if y_max is not None:
        ax.set_ylim(0, y_max)

    # Set y-axis label with appropriate time units
    time_unit = suite_df["time_unit"].iloc[0]
    effective_max = y_max if y_max is not None else max_val
    scale, display_unit = get_time_scale([effective_max], time_unit)
    ax.set_ylabel(f"Time ({display_unit})", fontsize=12)
    if scale != 1:
        fmt_str = ".2f" if y_max is not None and display_unit == "s" else ".1f"
        ax.yaxis.set_major_formatter(
            plt.FuncFormatter(lambda x, p, s=scale, f=fmt_str: f"{x / s:{f}}")
        )

    ax.set_title(title, fontsize=14)
    ax.legend(title="Implementation", loc="upper left", fontsize=10)
    ax.grid(axis="y", alpha=0.3)
    ax.tick_params(axis="y", labelsize=10)

    plt.tight_layout()
    chart_path = output_dir / f"{chart_name}.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Generated: {chart_path}")
    return chart_path


def generate_future_charts(suite_df: "pd.DataFrame", output_dir: Path):
    """Generate grouped bar chart for future benchmark.

    X-axis: Tree size (Small, Medium, Large)
    Grouped bars by implementation (serial, std::async, folly, dispenso variants)
    """
    suite_df = suite_df.copy()
    parsed = suite_df["name"].apply(_parse_future_benchmark)
    suite_df["implementation"] = parsed.apply(lambda x: x[0])
    suite_df["size"] = parsed.apply(lambda x: x[1])

    suite_df = suite_df[suite_df["size"].notna()]
    if suite_df.empty:
        return []

    # Order implementations logically
    impl_order = [
        "serial",
        "std::async",
        "folly::Future",
        "dispenso::Future",
        "dispenso::TaskSet",
        "dispenso::TaskSet (bulk)",
        "dispenso::when_all",
    ]
    implementations = [i for i in impl_order if i in suite_df["implementation"].values]
    for impl in suite_df["implementation"].unique():
        if impl not in implementations:
            implementations.append(impl)

    sizes = [s for s in ["Small", "Medium", "Large"] if s in suite_df["size"].values]

    # Main chart (full scale)
    charts = [
        _render_future_bar_chart(
            suite_df,
            implementations,
            sizes,
            output_dir,
            "future_chart",
            "Future/Async Tree Build Benchmark",
        )
    ]

    # Zoomed chart - clip y-axis to focus on competitive range
    if len(implementations) > 1:
        dispenso_impls = [
            i for i in implementations if i.startswith("dispenso") or i == "serial"
        ]
        dispenso_df = suite_df[suite_df["implementation"].isin(dispenso_impls)]
        if not dispenso_df.empty:
            y_max = dispenso_df["real_time"].max() * 1.5
        else:
            y_max = suite_df["real_time"].median() * 2

        charts.append(
            _render_future_bar_chart(
                suite_df,
                implementations,
                sizes,
                output_dir,
                "future_zoomed_chart",
                "Future/Async Tree Build (Y-Axis Zoomed)",
                y_max=y_max,
                show_value_labels=True,
            )
        )

    return charts


def generate_graph_charts(suite_df: "pd.DataFrame", output_dir: Path):
    """Generate horizontal bar chart for graph benchmark with logical grouping."""
    import numpy as np

    # Parse and clean benchmark names
    def parse_graph_name(name: str):
        clean = name.replace("BM_", "").replace("/real_time", "")
        # Simplify template names
        clean = clean.replace("<dispenso::BiPropGraph>", " (BiProp)")
        clean = clean.replace("<dispenso::Graph>", " (Graph)")
        clean = clean.replace("_", " ")
        return clean

    suite_df = suite_df.copy()
    suite_df["display_name"] = suite_df["name"].apply(parse_graph_name)

    # Define logical grouping order
    group_order = [
        # Build big tree - taskflow vs dispenso
        "taskflow build big tree",
        "build big tree (Graph)",
        "build big tree (BiProp)",
        # Dependency chain operations
        "build bi prop dependency chain",
        "build bi prop dependency group",
        "build dependency chain (Graph)",
        "build dependency chain (BiProp)",
        # Execute operations
        "execute dependency chain (Graph)",
        "execute dependency chain (BiProp)",
        # Forward propagator
        "forward propagator node (Graph)",
        "forward propagator node (BiProp)",
    ]

    # Sort by group order
    def sort_key(name):
        try:
            return group_order.index(name)
        except ValueError:
            return len(group_order)

    suite_df["sort_order"] = suite_df["display_name"].apply(sort_key)
    suite_df = suite_df.sort_values("sort_order")

    # Create horizontal bar chart
    fig, ax = plt.subplots(figsize=(12, 8))

    y = np.arange(len(suite_df))
    values = suite_df["real_time"].values
    labels = suite_df["display_name"].values

    # Color by type
    colors = []
    for name in labels:
        if "taskflow" in name.lower():
            colors.append("#f39c12")  # Orange for taskflow
        elif "BiProp" in name:
            colors.append("#27ae60")  # Darker green for BiProp
        else:
            colors.append("#2ecc71")  # Green for dispenso Graph

    bars = ax.barh(y, values, color=colors)

    ax.set_yticks(y)
    ax.set_yticklabels(labels, fontsize=10)

    # Convert units if needed
    max_val = values.max()
    time_unit = suite_df["time_unit"].iloc[0]
    if time_unit == "ns" and max_val >= 1e6:
        ax.set_xlabel("Time (ms)", fontsize=12)
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x / 1e6:.1f}"))
    elif time_unit == "ns" and max_val >= 1e3:
        ax.set_xlabel("Time (µs)", fontsize=12)
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x / 1e3:.0f}"))
    else:
        ax.set_xlabel(f"Time ({time_unit})", fontsize=12)

    ax.set_title("Graph Benchmark Results", fontsize=14)
    ax.grid(axis="x", alpha=0.3)

    # Add value labels
    for bar, val in zip(bars, values):
        if time_unit == "ns" and max_val >= 1e6:
            label = f"{val / 1e6:.2f}"
        elif time_unit == "ns" and max_val >= 1e3:
            label = f"{val / 1e3:.0f}"
        else:
            label = f"{val:.0f}"
        ax.text(
            bar.get_width(),
            bar.get_y() + bar.get_height() / 2,
            f" {label}",
            va="center",
            fontsize=9,
        )

    plt.tight_layout()
    chart_path = output_dir / "graph_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Generated: {chart_path}")

    return [chart_path]


def _clean_allocator_name(name):
    """Clean up allocator benchmark display names."""
    clean = name.replace("BM_", "").replace("/real_time", "")
    clean = clean.replace("<kSmallSize>", "[S]")
    clean = clean.replace("<kMediumSize>", "[M]")
    clean = clean.replace("<kLargeSize>", "[L]")
    clean = clean.replace("<kSmallSize,", "[S,")
    clean = clean.replace("<kMediumSize,", "[M,")
    clean = clean.replace("<kLargeSize,", "[L,")
    clean = clean.replace(">", "]")
    clean = clean.replace("_", " ")
    return clean


def _generate_small_buffer_charts(suite_df: "pd.DataFrame", output_dir: Path) -> list:
    """Generate horizontal bar chart for small_buffer benchmark."""
    import numpy as np

    suite_df = suite_df.copy()
    suite_df["display_name"] = suite_df["name"].apply(_clean_allocator_name)

    def parse_small_buffer_name(name):
        if "[S]" in name:
            size, size_label = 0, "S"
        elif "[M]" in name:
            size, size_label = 1, "M"
        elif "[L]" in name:
            size, size_label = 2, "L"
        else:
            size, size_label = 3, "?"

        iter_match = re.search(r"/(\d+)", name)
        iterations = int(iter_match.group(1)) if iter_match else 0
        thread_match = re.search(r"threads:(\d+)", name)
        threads = int(thread_match.group(1)) if thread_match else 1
        is_small_buffer = "small buffer" in name.lower()
        return size, iterations, threads, is_small_buffer, size_label

    parsed = suite_df["display_name"].apply(parse_small_buffer_name)
    suite_df["size_order"] = parsed.apply(lambda x: x[0])
    suite_df["iterations"] = parsed.apply(lambda x: x[1])
    suite_df["threads"] = parsed.apply(lambda x: x[2])
    suite_df["is_small_buffer"] = parsed.apply(lambda x: x[3])

    suite_df = suite_df.sort_values(
        ["size_order", "iterations", "threads", "is_small_buffer"]
    )

    fig, ax = plt.subplots(figsize=(14, max(8, len(suite_df) * 0.35)))
    y = np.arange(len(suite_df))
    values = suite_df["real_time"].values.copy()
    labels = suite_df["display_name"].values

    # Detect outliers using 90th percentile and clip
    threshold = np.percentile(values, 90)
    clipped_values = np.minimum(values, threshold * 1.1)
    outlier_mask = values > threshold

    colors = [
        "#2ecc71" if is_sb else "#e74c3c" for is_sb in suite_df["is_small_buffer"]
    ]

    bars = ax.barh(y, clipped_values, color=colors)
    ax.set_yticks(y)
    ax.set_yticklabels(labels, fontsize=8)

    # Add outlier annotations
    for i, (is_outlier, orig_val) in enumerate(zip(outlier_mask, values)):
        if is_outlier:
            label = f"({_format_time_value(orig_val)})"
            ax.text(
                clipped_values[i],
                i,
                f" {label}",
                va="center",
                fontsize=7,
                style="italic",
            )

    time_unit = suite_df["time_unit"].iloc[0]
    scale, display_unit = get_time_scale(clipped_values, time_unit)
    ax.set_xlabel(f"Time ({display_unit})", fontsize=12)
    if scale != 1:
        ax.xaxis.set_major_formatter(
            plt.FuncFormatter(lambda x, p, s=scale: f"{x / s:.1f}")
        )

    ax.set_title("Small Buffer Allocator Benchmark", fontsize=14)
    ax.grid(axis="x", alpha=0.3)

    from matplotlib.patches import Patch

    legend_elements = [
        Patch(facecolor="#2ecc71", label="SmallBufferAllocator"),
        Patch(facecolor="#e74c3c", label="new/delete"),
    ]
    ax.legend(handles=legend_elements, loc="lower right", fontsize=9)

    plt.tight_layout()
    chart_path = output_dir / "small_buffer_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Generated: {chart_path}")
    return [chart_path]


def _generate_pool_allocator_split_charts(
    suite_df: "pd.DataFrame", output_dir: Path
) -> list:
    """Generate split fast/slow horizontal bar charts for pool_allocator benchmark."""
    import numpy as np

    suite_df = suite_df.copy()
    suite_df["display_name"] = suite_df["name"].apply(_clean_allocator_name)

    charts = []
    threshold = suite_df["real_time"].median()

    for group_name, group_df in [
        ("fast", suite_df[suite_df["real_time"] < threshold]),
        ("slow", suite_df[suite_df["real_time"] >= threshold]),
    ]:
        if group_df.empty:
            continue

        group_df = group_df.sort_values("real_time")

        fig, ax = plt.subplots(figsize=(12, max(6, len(group_df) * 0.35)))
        y = np.arange(len(group_df))
        values = group_df["real_time"].values
        labels = group_df["display_name"].values

        colors = []
        for name in labels:
            name_lower = name.lower()
            if "malloc" in name_lower:
                colors.append("#e74c3c")
            elif "arena" in name_lower:
                colors.append("#2ecc71")
            elif "nl pool" in name_lower:
                colors.append("#3498db")
            else:
                colors.append("#9b59b6")

        ax.barh(y, values, color=colors)
        ax.set_yticks(y)
        ax.set_yticklabels(labels, fontsize=8)

        max_val = values.max()
        time_unit = group_df["time_unit"].iloc[0]
        scale, display_unit = get_time_scale([max_val], time_unit)
        ax.set_xlabel(f"Time ({display_unit})", fontsize=12)
        if scale != 1:
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p, s=scale: f"{x / s:.1f}")
            )

        title_suffix = "Fast Operations" if group_name == "fast" else "Slow Operations"
        ax.set_title(f"Pool Allocator - {title_suffix}", fontsize=14)
        ax.grid(axis="x", alpha=0.3)

        plt.tight_layout()
        chart_path = output_dir / f"pool_allocator_{group_name}_chart.png"
        plt.savefig(chart_path, dpi=150, bbox_inches="tight")
        plt.close()
        charts.append(chart_path)
        print(f"Generated: {chart_path}")

    return charts


def generate_allocator_charts(suite_df: "pd.DataFrame", output_dir: Path, suite: str):
    """Generate horizontal bar charts for pool_allocator and small_buffer benchmarks."""
    if suite == "small_buffer":
        return _generate_small_buffer_charts(suite_df, output_dir)
    elif suite == "pool_allocator":
        return _generate_pool_allocator_split_charts(suite_df, output_dir)
    return []


def _parse_rw_lock_name(name: str):
    """Parse rw_lock benchmark name into (operation, mutex, threads, contention)."""
    clean = name.replace("BM_", "").replace("/real_time", "")

    mutex = None
    for m in ["NopMutex", "std::shared_mutex", "dispenso::RWLock"]:
        if m in clean:
            mutex = m
            clean = clean.replace(f"<{m}>", "")
            break

    parts = clean.split("/")
    operation = parts[0]

    if operation == "serial":
        threads = None
        contention = parts[1] if len(parts) > 1 else None
    else:
        threads = parts[1] if len(parts) > 1 else None
        contention = parts[2] if len(parts) > 2 else None

    return operation, mutex, threads, contention


def _render_rw_lock_parallel_chart(
    cont_df: "pd.DataFrame",
    cont: str,
    available_mutexes: list,
    thread_counts: list,
    colors: dict,
    output_dir: Path,
) -> Path:
    """Render a single rw_lock parallel chart for a given contention level."""
    import numpy as np

    fig, ax = plt.subplots(figsize=(10, 5))

    y = np.arange(len(thread_counts))
    height = 0.8 / len(available_mutexes)

    max_val = 0
    for i, mutex in enumerate(available_mutexes):
        values = []
        for threads in thread_counts:
            row = cont_df[(cont_df["mutex"] == mutex) & (cont_df["threads"] == threads)]
            values.append(row["real_time"].iloc[0] if not row.empty else 0)
        values = np.array(values)
        max_val = max(max_val, values.max())
        ax.barh(
            y + i * height - height * len(available_mutexes) / 2 + height / 2,
            values,
            height,
            label=mutex,
            color=colors.get(mutex, "#3498db"),
        )

    ax.set_yticks(y)
    ax.set_yticklabels([f"{t} threads" for t in thread_counts], fontsize=10)

    scale, display_unit = get_time_scale([max_val], "ns")
    ax.set_xlabel(f"Time ({display_unit})", fontsize=12)
    if scale != 1:
        ax.xaxis.set_major_formatter(
            plt.FuncFormatter(lambda x, p, s=scale: f"{x / s:.0f}")
        )

    ax.set_title(f"RW Lock - Parallel ({cont} iterations)", fontsize=14)
    ax.legend(loc="upper right", fontsize=9)
    ax.grid(axis="x", alpha=0.3)

    plt.tight_layout()
    chart_path = output_dir / f"rw_lock_parallel_{cont}_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Generated: {chart_path}")
    return chart_path


def generate_rw_lock_charts(suite_df: "pd.DataFrame", output_dir: Path):
    """Generate horizontal grouped bar charts for rw_lock benchmark."""
    import numpy as np

    suite_df = suite_df.copy()
    parsed = suite_df["name"].apply(_parse_rw_lock_name)
    suite_df["operation"] = parsed.apply(lambda x: x[0])
    suite_df["mutex"] = parsed.apply(lambda x: x[1])
    suite_df["threads"] = parsed.apply(lambda x: x[2])
    suite_df["contention"] = parsed.apply(lambda x: x[3])

    charts = []
    mutexes = ["NopMutex", "std::shared_mutex", "dispenso::RWLock"]
    colors = {
        "NopMutex": "#95a5a6",
        "std::shared_mutex": "#e74c3c",
        "dispenso::RWLock": "#2ecc71",
    }

    # Serial chart
    serial_df = suite_df[suite_df["operation"] == "serial"]
    if not serial_df.empty:
        available_mutexes = [m for m in mutexes if m in serial_df["mutex"].values]
        contentions = sorted(
            serial_df["contention"].dropna().unique(), key=lambda x: int(x)
        )

        if available_mutexes and contentions:
            fig, ax = plt.subplots(figsize=(10, 5))

            y = np.arange(len(contentions))
            height = 0.8 / len(available_mutexes)

            for i, mutex in enumerate(available_mutexes):
                values = []
                for cont in contentions:
                    row = serial_df[
                        (serial_df["mutex"] == mutex)
                        & (serial_df["contention"] == cont)
                    ]
                    values.append(row["real_time"].iloc[0] if not row.empty else 0)
                values = np.array(values)
                ax.barh(
                    y + i * height - height * len(available_mutexes) / 2 + height / 2,
                    values,
                    height,
                    label=mutex,
                    color=colors.get(mutex, "#3498db"),
                )

            ax.set_yticks(y)
            ax.set_yticklabels([f"{c} iterations" for c in contentions], fontsize=10)
            ax.set_xlabel("Time (ms)", fontsize=12)
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p: f"{x / 1e6:.1f}")
            )
            ax.set_title("RW Lock - Serial Operations", fontsize=14)
            ax.legend(loc="upper right", fontsize=9)
            ax.grid(axis="x", alpha=0.3)

            plt.tight_layout()
            chart_path = output_dir / "rw_lock_serial_chart.png"
            plt.savefig(chart_path, dpi=150, bbox_inches="tight")
            plt.close()
            charts.append(chart_path)
            print(f"Generated: {chart_path}")

    # Parallel charts - one per contention level
    parallel_df = suite_df[suite_df["operation"] == "parallel"]
    if not parallel_df.empty:
        available_mutexes = [m for m in mutexes if m in parallel_df["mutex"].values]
        available_mutexes = [m for m in available_mutexes if m != "NopMutex"]
        contentions = sorted(
            parallel_df["contention"].dropna().unique(), key=lambda x: int(x)
        )
        thread_counts = sorted(
            parallel_df["threads"].dropna().unique(), key=lambda x: int(x)
        )

        for cont in contentions:
            cont_df = parallel_df[parallel_df["contention"] == cont]
            if not cont_df.empty:
                chart_path = _render_rw_lock_parallel_chart(
                    cont_df, cont, available_mutexes, thread_counts, colors, output_dir
                )
                charts.append(chart_path)

    return charts


def generate_once_function_charts(suite_df: "pd.DataFrame", output_dir: Path):
    """Generate horizontal bar charts for once_function benchmark.

    Split into move vs queue operations, grouped by size.
    """
    import numpy as np

    # Parse benchmark names
    def parse_once_function_name(name: str):
        clean = name.replace("BM_", "").replace("/real_time", "")

        # Extract operation (move or queue)
        if clean.startswith("move_"):
            operation = "move"
            clean = clean[5:]
        elif clean.startswith("queue_"):
            operation = "queue"
            clean = clean[6:]
        else:
            operation = "unknown"

        # Extract function type and size
        if "<" in clean:
            func_type, size = clean.split("<")
            size = size.rstrip(">")
        else:
            func_type = clean
            size = "unknown"

        # Clean up function type name
        func_type = func_type.replace("_", " ").title()

        return operation, func_type, size

    suite_df = suite_df.copy()
    parsed = suite_df["name"].apply(parse_once_function_name)
    suite_df["operation"] = parsed.apply(lambda x: x[0])
    suite_df["func_type"] = parsed.apply(lambda x: x[1])
    suite_df["size"] = parsed.apply(lambda x: x[2])

    charts = []
    size_order = ["kSmallSize", "kMediumSize", "kLargeSize", "kExtraLargeSize"]
    size_labels = {
        "kSmallSize": "Small",
        "kMediumSize": "Medium",
        "kLargeSize": "Large",
        "kExtraLargeSize": "Extra Large",
    }
    colors = {
        "Std Function": "#e74c3c",
        "Once Function": "#2ecc71",
        "Inline Function": "#3498db",
    }

    for op in ["move", "queue"]:
        op_df = suite_df[suite_df["operation"] == op]
        if op_df.empty:
            continue

        func_types = sorted(op_df["func_type"].unique())
        sizes = [s for s in size_order if s in op_df["size"].values]

        if not func_types or not sizes:
            continue

        fig, ax = plt.subplots(figsize=(10, 5))

        y = np.arange(len(sizes))
        height = 0.8 / len(func_types)

        max_val = 0
        for i, func_type in enumerate(func_types):
            values = []
            for size in sizes:
                row = op_df[(op_df["func_type"] == func_type) & (op_df["size"] == size)]
                values.append(row["real_time"].iloc[0] if not row.empty else 0)
            values = np.array(values)
            max_val = max(max_val, values.max())
            ax.barh(
                y + i * height - height * len(func_types) / 2 + height / 2,
                values,
                height,
                label=func_type,
                color=colors.get(func_type, "#9b59b6"),
            )

        ax.set_yticks(y)
        ax.set_yticklabels([size_labels.get(s, s) for s in sizes], fontsize=11)

        # Format time axis
        if max_val >= 1e6:
            ax.set_xlabel("Time (ms)", fontsize=12)
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p: f"{x / 1e6:.1f}")
            )
        elif max_val >= 1e3:
            ax.set_xlabel("Time (µs)", fontsize=12)
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p: f"{x / 1e3:.1f}")
            )
        else:
            ax.set_xlabel("Time (ns)", fontsize=12)

        ax.set_title(f"Once Function - {op.title()} Operations", fontsize=14)
        ax.legend(loc="upper right", fontsize=10)
        ax.grid(axis="x", alpha=0.3)

        plt.tight_layout()
        chart_path = output_dir / f"once_function_{op}_chart.png"
        plt.savefig(chart_path, dpi=150, bbox_inches="tight")
        plt.close()
        charts.append(chart_path)
        print(f"Generated: {chart_path}")

    return charts


def generate_pool_allocator_charts(suite_df: "pd.DataFrame", output_dir: Path):
    """Generate horizontal bar charts for pool_allocator benchmark.

    Split by thread count, grouped by allocator type and size.
    """
    import numpy as np

    # Parse benchmark names
    def parse_pool_alloc_name(name: str):
        clean = name.replace("BM_", "").replace("/real_time", "")

        # Extract allocator type
        if clean.startswith("mallocfree_threaded"):
            alloc_type = "malloc/free"
            rest = clean[19:]  # after "mallocfree_threaded"
        elif clean.startswith("mallocfree"):
            alloc_type = "malloc/free"
            rest = clean[10:]  # after "mallocfree"
        elif clean.startswith("nl_pool_allocator_arena"):
            alloc_type = "NoLock Arena"
            rest = clean[23:]
        elif clean.startswith("nl_pool_allocator"):
            alloc_type = "NoLock"
            rest = clean[17:]
        elif clean.startswith("pool_allocator_arena"):
            alloc_type = "Arena"
            rest = clean[20:]
        elif clean.startswith("pool_allocator_threaded"):
            alloc_type = "PoolAllocator"
            rest = clean[23:]
        elif clean.startswith("pool_allocator"):
            alloc_type = "PoolAllocator"
            rest = clean[14:]
        else:
            alloc_type = "unknown"
            rest = clean

        # Parse <Size,Threads>/iterations or <Size>/iterations
        size = "unknown"
        threads = 1
        iterations = 0

        if "<" in rest:
            params, iters = rest.split(">")
            params = params.lstrip("<")
            if "," in params:
                size, threads = params.split(",")
                threads = int(threads)
            else:
                size = params
            if "/" in iters:
                iterations = int(iters.lstrip("/"))
        elif "/" in rest:
            iterations = int(rest.lstrip("/"))

        return alloc_type, size, threads, iterations

    suite_df = suite_df.copy()
    parsed = suite_df["name"].apply(parse_pool_alloc_name)
    suite_df["alloc_type"] = parsed.apply(lambda x: x[0])
    suite_df["size"] = parsed.apply(lambda x: x[1])
    suite_df["threads"] = parsed.apply(lambda x: x[2])
    suite_df["iterations"] = parsed.apply(lambda x: x[3])

    charts = []
    size_order = ["kSmallSize", "kMediumSize", "kLargeSize"]
    size_labels = {"kSmallSize": "S", "kMediumSize": "M", "kLargeSize": "L"}

    # Colors for allocator types
    colors = {
        "malloc/free": "#e74c3c",
        "PoolAllocator": "#2ecc71",
        "Arena": "#27ae60",
        "NoLock": "#3498db",
        "NoLock Arena": "#2980b9",
    }

    # Group by thread count
    thread_counts = sorted(suite_df["threads"].unique())

    for threads in thread_counts:
        thread_df = suite_df[suite_df["threads"] == threads]
        if thread_df.empty:
            continue

        # For single-threaded, show all allocator types
        # For multi-threaded, show only threaded variants
        if threads == 1:
            alloc_types = [
                "malloc/free",
                "PoolAllocator",
                "Arena",
                "NoLock",
                "NoLock Arena",
            ]
            title_suffix = "Single-threaded"
        else:
            alloc_types = ["malloc/free", "PoolAllocator"]
            title_suffix = f"{threads} Threads"

        alloc_types = [a for a in alloc_types if a in thread_df["alloc_type"].values]
        iterations_list = sorted(thread_df["iterations"].unique())

        if not alloc_types:
            continue

        # Create grouped labels: Size x Iterations
        group_labels = []
        for size in size_order:
            for iters in iterations_list:
                if iters >= 1000:
                    iters_label = f"{iters // 1000}K"
                else:
                    iters_label = str(iters)
                group_labels.append(f"{size_labels.get(size, size)}/{iters_label}")

        fig, ax = plt.subplots(figsize=(12, max(6, len(group_labels) * 0.4)))

        y = np.arange(len(group_labels))
        height = 0.8 / len(alloc_types)

        max_val = 0
        for i, alloc_type in enumerate(alloc_types):
            values = []
            for size in size_order:
                for iters in iterations_list:
                    row = thread_df[
                        (thread_df["alloc_type"] == alloc_type)
                        & (thread_df["size"] == size)
                        & (thread_df["iterations"] == iters)
                    ]
                    values.append(row["real_time"].iloc[0] if not row.empty else 0)
            values = np.array(values)
            max_val = max(max_val, values.max())
            ax.barh(
                y + i * height - height * len(alloc_types) / 2 + height / 2,
                values,
                height,
                label=alloc_type,
                color=colors.get(alloc_type, "#9b59b6"),
            )

        ax.set_yticks(y)
        ax.set_yticklabels(group_labels, fontsize=10)

        # Format time axis
        if max_val >= 1e9:
            ax.set_xlabel("Time (s)", fontsize=12)
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p: f"{x / 1e9:.1f}")
            )
        elif max_val >= 1e6:
            ax.set_xlabel("Time (ms)", fontsize=12)
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p: f"{x / 1e6:.0f}")
            )
        elif max_val >= 1e3:
            ax.set_xlabel("Time (µs)", fontsize=12)
            ax.xaxis.set_major_formatter(
                plt.FuncFormatter(lambda x, p: f"{x / 1e3:.0f}")
            )
        else:
            ax.set_xlabel("Time (ns)", fontsize=12)

        ax.set_title(f"Pool Allocator - {title_suffix}", fontsize=14)
        ax.legend(loc="upper right", fontsize=9)
        ax.grid(axis="x", alpha=0.3)

        plt.tight_layout()
        chart_path = output_dir / f"pool_allocator_{threads}t_chart.png"
        plt.savefig(chart_path, dpi=150, bbox_inches="tight")
        plt.close()
        charts.append(chart_path)
        print(f"Generated: {chart_path}")

    return charts


def generate_simple_horizontal_chart(
    suite_df: "pd.DataFrame", output_dir: Path, suite: str
):
    """Generate a simple horizontal bar chart for a benchmark suite."""
    import numpy as np

    suite_df = suite_df.copy()
    suite_df["display_name"] = suite_df["name"].apply(
        lambda x: x.replace("BM_", "").replace("/real_time", "").replace("_", " ")
    )
    suite_df = suite_df.sort_values("real_time")

    fig, ax = plt.subplots(figsize=(12, max(6, len(suite_df) * 0.5)))

    y = np.arange(len(suite_df))
    values = suite_df["real_time"].values
    labels = suite_df["display_name"].values

    bars = ax.barh(y, values, color="#3498db")
    ax.set_yticks(y)
    ax.set_yticklabels(labels, fontsize=10)

    max_val = values.max()
    time_unit = suite_df["time_unit"].iloc[0]
    if time_unit == "ns" and max_val >= 1e6:
        ax.set_xlabel("Time (ms)", fontsize=12)
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x / 1e6:.1f}"))
    elif time_unit == "ns" and max_val >= 1e3:
        ax.set_xlabel("Time (µs)", fontsize=12)
        ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{x / 1e3:.0f}"))
    else:
        ax.set_xlabel(f"Time ({time_unit})", fontsize=12)

    ax.set_title(f"{suite.replace('_', ' ').title()} Benchmark", fontsize=14)
    ax.grid(axis="x", alpha=0.3)

    plt.tight_layout()
    chart_path = output_dir / f"{suite}_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Generated: {chart_path}")

    return [chart_path]


def _parse_timed_task_row(row):
    """Parse a timed_task benchmark row into a dict with library, config, etc.

    Returns None if the row doesn't have the expected format.
    """
    name = row["name"]
    mean_err = row.get("mean", None)
    stddev_err = row.get("stddev", None)

    if mean_err is None or stddev_err is None:
        return None

    match = re.match(r"BM_(\w+)<(.+)>", name)
    if not match:
        return None

    func_name = match.group(1)
    template_args = match.group(2).strip()

    if "mixed" in func_name:
        library = func_name.replace("_mixed", "")
        steady = template_args.strip().lower() == "true"
        config_label = f"Mixed ({'Steady' if steady else 'Normal'})"
        period_ms = None
        is_mixed = True
    else:
        library = func_name
        is_mixed = False
        parts = [p.strip() for p in template_args.split(",")]
        if len(parts) != 2:
            return None
        try:
            period_ms = int(parts[0])
        except ValueError:
            return None
        steady = parts[1].lower() == "true"
        config_label = f"{period_ms}ms ({'Steady' if steady else 'Normal'})"

    return {
        "library": library,
        "config": config_label,
        "is_mixed": is_mixed,
        "steady": steady,
        "period_ms": period_ms,
        "mean_error_us": mean_err * 1e6,
        "stddev_us": stddev_err * 1e6,
    }


def _build_timed_task_config_order(timed_df: "pd.DataFrame") -> list:
    """Build the ordered list of config labels for timed_task charts."""
    config_order = []
    # Single-period configs sorted by period
    for steady in [False, True]:
        for period in sorted(
            timed_df[~timed_df["is_mixed"]]["period_ms"].dropna().unique()
        ):
            label = f"{int(period)}ms ({'Steady' if steady else 'Normal'})"
            if label in timed_df["config"].values:
                config_order.append(label)
    # Mixed configs
    for steady in [False, True]:
        label = f"Mixed ({'Steady' if steady else 'Normal'})"
        if label in timed_df["config"].values:
            config_order.append(label)
    return [c for c in config_order if c in timed_df["config"].values]


def generate_timed_task_charts(suite_df: "pd.DataFrame", output_dir: Path):
    """Generate bar chart of scheduling jitter for timed_task benchmark."""
    import numpy as np

    rows = []
    for _, row in suite_df.iterrows():
        parsed = _parse_timed_task_row(row)
        if parsed is not None:
            rows.append(parsed)

    if not rows:
        print("  No timed_task data with mean/stddev counters found")
        return []

    timed_df = pd.DataFrame(rows)
    config_order = _build_timed_task_config_order(timed_df)

    libraries = sorted(timed_df["library"].unique())
    lib_colors = {lib: get_library_color(lib) for lib in libraries}

    fig, ax = plt.subplots(figsize=(12, max(6, len(config_order) * 0.8)))

    y = np.arange(len(config_order))
    bar_height = 0.8 / len(libraries)

    for i, lib in enumerate(libraries):
        means = []
        stddevs = []
        for config in config_order:
            row = timed_df[
                (timed_df["library"] == lib) & (timed_df["config"] == config)
            ]
            if not row.empty:
                means.append(row["mean_error_us"].iloc[0])
                stddevs.append(row["stddev_us"].iloc[0])
            else:
                means.append(0)
                stddevs.append(0)

        offset = i * bar_height - bar_height * len(libraries) / 2 + bar_height / 2
        ax.barh(
            y + offset,
            means,
            bar_height,
            xerr=stddevs,
            label=lib,
            color=lib_colors.get(lib, "#7f8c8d"),
            capsize=3,
            error_kw={"linewidth": 1},
        )

    ax.set_yticks(y)
    ax.set_yticklabels(config_order, fontsize=10)
    ax.set_xlabel("Mean Scheduling Error (µs)", fontsize=12)
    ax.set_title("Timed Task Scheduling Jitter", fontsize=14)
    ax.legend(loc="upper right", fontsize=10)
    ax.grid(axis="x", alpha=0.3)

    plt.tight_layout()
    chart_path = output_dir / "timed_task_chart.png"
    plt.savefig(chart_path, dpi=150, bbox_inches="tight")
    plt.close()
    print(f"Generated: {chart_path}")

    return [chart_path]


def generate_charts(df: "pd.DataFrame", output_dir: Path):
    """Generate comparison charts from benchmark data."""
    if df.empty:
        print("No data to plot")
        return

    output_dir.mkdir(parents=True, exist_ok=True)

    # Parse benchmark names to extract structured data
    parsed = df["name"].apply(parse_benchmark_name)
    df = df.copy()
    df["library"] = parsed.apply(lambda x: x["library"])
    df["threads"] = parsed.apply(lambda x: x["threads"])
    df["worksize"] = parsed.apply(lambda x: x["worksize"])

    # Suites with specialized chart generation (thread scaling line charts)
    specialized_thread_scaling = [
        "simple_for",
        "summing_for",
        "trivial_compute",
        "nested_for",
    ]

    # Group by suite
    for suite, suite_df in df.groupby("suite"):
        # Special handling for concurrent_vector suite (and tcmalloc variant)
        if suite == "concurrent_vector":
            generate_concurrent_vector_charts(suite_df, output_dir)
            continue
        if suite == "concurrent_vector_tcmalloc":
            generate_concurrent_vector_charts(suite_df, output_dir, suffix="_tcmalloc")
            continue

        # Special handling for future benchmark
        if suite == "future":
            generate_future_charts(suite_df, output_dir)
            continue

        # Special handling for graph benchmark
        if suite == "graph":
            generate_graph_charts(suite_df, output_dir)
            continue

        # Special handling for pool_allocator and small_buffer
        if suite == "pool_allocator":
            generate_pool_allocator_charts(suite_df, output_dir)
            continue
        if suite == "small_buffer":
            generate_allocator_charts(suite_df, output_dir, suite)
            continue

        # Special handling for rw_lock
        if suite == "rw_lock":
            generate_rw_lock_charts(suite_df, output_dir)
            continue

        # Special handling for once_function - split into move/queue charts
        if suite == "once_function":
            generate_once_function_charts(suite_df, output_dir)
            continue

        # Timed task: bar chart of scheduling jitter (mean error + stddev)
        if suite == "timed_task":
            generate_timed_task_charts(suite_df, output_dir)
            continue

        # Special handling for simple_for, summing_for, trivial_compute
        if suite in specialized_thread_scaling:
            generate_simple_for_charts(suite_df, output_dir, suite)
            continue

        # Determine chart type based on data structure
        has_thread_data = suite_df["threads"].notna().any()
        worksizes = suite_df["worksize"].dropna().unique()

        if has_thread_data and len(worksizes) > 0:
            # Generate line charts for each worksize (no max_threads chart)
            for worksize in sorted(worksizes, key=lambda x: (str(x).isdigit(), x)):
                chart_path = generate_line_chart(suite, suite_df, output_dir, worksize)
                if chart_path:
                    print(f"Generated: {chart_path}")
        else:
            # Simple bar chart for benchmarks without thread/worksize structure
            chart_path = generate_bar_chart(suite, suite_df, output_dir)
            print(f"Generated: {chart_path}")


def _format_worksize(worksize) -> str:
    """Format a numeric worksize for display (e.g., 1000000 -> '1M')."""
    if not str(worksize).isdigit():
        return str(worksize)
    ws_int = int(worksize)
    if ws_int >= 1000000000:
        return f"{ws_int // 1000000000}B"
    elif ws_int >= 1000000:
        return f"{ws_int // 1000000}M"
    elif ws_int >= 1000:
        return f"{ws_int // 1000}K"
    return str(ws_int)


# Registry mapping suite names to their chart entries.
# Each entry is (caption, chart_filename) or None for entries that need dynamic generation.
_SUITE_CHART_REGISTRY = {
    "concurrent_vector": [
        ("**Serial/Access Operations:**", "concurrent_vector_serial_chart.png"),
        ("**Parallel Operations:**", "concurrent_vector_parallel_chart.png"),
    ],
    "concurrent_vector_tcmalloc": [
        (
            "**Serial/Access Operations (tcmalloc):**",
            "concurrent_vector_serial_tcmalloc_chart.png",
        ),
        (
            "**Parallel Operations (tcmalloc):**",
            "concurrent_vector_parallel_tcmalloc_chart.png",
        ),
    ],
    "future": [
        ("**Full comparison (including std::async):**", "future_chart.png"),
        ("**Zoomed (excluding std::async):**", "future_zoomed_chart.png"),
    ],
    "graph": [
        (None, "graph_chart.png"),
    ],
    "small_buffer": [
        (None, "small_buffer_chart.png"),
    ],
    "pool_allocator": [
        ("**Single-threaded:**", "pool_allocator_1t_chart.png"),
        ("**2 Threads:**", "pool_allocator_2t_chart.png"),
        ("**8 Threads:**", "pool_allocator_8t_chart.png"),
        ("**16 Threads:**", "pool_allocator_16t_chart.png"),
    ],
    "once_function": [
        ("**Move Operations:**", "once_function_move_chart.png"),
        ("**Queue Operations:**", "once_function_queue_chart.png"),
    ],
    "rw_lock": [
        ("**Serial Operations:**", "rw_lock_serial_chart.png"),
        ("**Parallel Operations:**", None),  # sentinel for per-contention charts
    ],
    "timed_task": [
        ("**Scheduling Jitter (Mean Error ± Stddev):**", "timed_task_chart.png"),
    ],
}

# Suites that use worksize-based thread scaling charts (with zoomed variants)
_THREAD_SCALING_SUITES = {"simple_for", "summing_for", "trivial_compute", "nested_for"}

# RW lock contention levels for parallel sub-charts
_RW_LOCK_CONTENTIONS = [2, 8, 32, 128, 512]


def _write_suite_detail_table(suite: str, suite_df: "pd.DataFrame", output_dir: Path):
    """Write a per-suite detailed results table to a separate markdown file."""
    detail_path = output_dir / f"{suite}_details.md"
    with open(detail_path, "w") as detail_f:
        detail_f.write(f"# {suite} - Detailed Results\n\n")
        detail_f.write("| Benchmark | Time | Unit | Iterations |\n")
        detail_f.write("|-----------|------|------|------------|\n")
        for _, row in suite_df.iterrows():
            detail_f.write(
                f"| {row['name']} | {row['real_time']:.2f} | "
                f"{row['time_unit']} | {row['iterations']} |\n"
            )


def _write_suite_charts_markdown(f, suite: str, suite_parsed: "pd.DataFrame"):
    """Write chart markdown for a single suite to the report file."""
    # Check for registry entry first
    if suite in _SUITE_CHART_REGISTRY:
        for caption, chart_file in _SUITE_CHART_REGISTRY[suite]:
            if suite == "rw_lock" and chart_file is None:
                # Emit per-contention parallel charts
                for cont in _RW_LOCK_CONTENTIONS:
                    f.write(f"**{cont} Iterations:**\n\n")
                    f.write(
                        f"![rw_lock parallel {cont}](rw_lock_parallel_{cont}_chart.png)\n\n"
                    )
                continue
            if caption:
                f.write(f"{caption}\n\n")
            alt = suite.replace("_", " ")
            f.write(f"![{alt}]({chart_file})\n\n")
        return

    # Thread scaling suites with zoomed charts
    if suite in _THREAD_SCALING_SUITES:
        worksizes = suite_parsed["worksize"].dropna().unique()
        numeric_worksizes = [w for w in worksizes if str(w).isdigit()]
        for worksize in sorted(numeric_worksizes, key=lambda x: int(x)):
            ws_display = _format_worksize(worksize)
            f.write(f"**{ws_display} elements:**\n\n")
            f.write(f"![{suite} {ws_display}]({suite}_{worksize}_chart.png)\n\n")
            f.write(f"**{ws_display} elements (Y-Axis Zoomed):**\n\n")
            f.write(
                f"![{suite} {ws_display} zoomed]({suite}_{worksize}_zoomed_chart.png)\n\n"
            )
        return

    # Generic fallback: worksize line charts or single bar chart
    has_thread_data = suite_parsed["threads"].notna().any()
    worksizes = suite_parsed["worksize"].dropna().unique()

    if has_thread_data and len(worksizes) > 0:
        for worksize in sorted(
            worksizes,
            key=lambda x: (str(x).isdigit(), int(x) if str(x).isdigit() else 0),
        ):
            ws_display = _format_worksize(worksize)
            chart_path = f"{suite}_{worksize}_chart.png"
            f.write(f"**{ws_display} elements:**\n\n")
            f.write(f"![{suite} {ws_display}]({chart_path})\n\n")
            if suite == "nested_pool" and str(worksize) == "1000000":
                f.write(
                    "*Note: folly::CPUThreadPoolExecutor is excluded from "
                    "the 1M chart as it fails to complete (likely due to "
                    "memory exhaustion from creating too many futures).*\n\n"
                )
    else:
        chart_path = f"{suite}_chart.png"
        f.write(f"![{suite} results]({chart_path})\n\n")


def generate_markdown_report(
    machine_info: Dict[str, Any],
    results: List[Dict],
    df: "pd.DataFrame",
    output_dir: Path,
):
    """Generate a markdown report of benchmark results."""
    report_path = output_dir / "benchmark_report.md"

    with open(report_path, "w") as f:
        f.write("# Dispenso Benchmark Results\n\n")

        # Machine info
        f.write("## Machine Information\n\n")
        f.write(f"- **Date**: {machine_info.get('timestamp', 'unknown')}\n")
        f.write(
            f"- **Platform**: {machine_info.get('platform', 'unknown')} "
            f"{machine_info.get('platform_release', '')}\n"
        )
        f.write(f"- **CPU**: {machine_info.get('cpu_model', 'unknown')}\n")
        f.write(f"- **Hardware Threads**: {machine_info.get('cpu_cores', 'unknown')}\n")
        f.write(f"- **Memory**: {machine_info.get('memory_gb', 'unknown')} GB\n")
        compiler = machine_info.get("compiler", {})
        if compiler:
            f.write(f"- **Compiler**: {compiler.get('compiler_summary', 'unknown')}\n")
        f.write("\n")

        # Results summary
        f.write("## Results Summary\n\n")

        successful = sum(1 for r in results if r.get("success"))
        f.write(f"- **Benchmarks run**: {len(results)}\n")
        f.write(f"- **Successful**: {successful}\n")
        f.write(f"- **Failed**: {len(results) - successful}\n\n")

        # Per-suite results with links to detailed tables
        if not df.empty:
            parsed = df["name"].apply(parse_benchmark_name)
            df_with_parsed = df.copy()
            df_with_parsed["threads"] = parsed.apply(lambda x: x["threads"])
            df_with_parsed["worksize"] = parsed.apply(lambda x: x["worksize"])

            for suite in df["suite"].unique():
                f.write(f"### {suite}\n\n")

                suite_df = df[df["suite"] == suite].sort_values("real_time")
                suite_parsed = df_with_parsed[df_with_parsed["suite"] == suite]

                _write_suite_detail_table(suite, suite_df, output_dir)
                _write_suite_charts_markdown(f, suite, suite_parsed)
                f.write(f"[View detailed results table]({suite}_details.md)\n\n")

        # Failures
        failures = [r for r in results if not r.get("success")]
        if failures:
            f.write("## Failures\n\n")
            for fail in failures:
                f.write(f"### {fail['name']}\n\n")
                f.write(f"```\n{fail.get('error', 'Unknown error')}\n```\n\n")

    print(f"Generated report: {report_path}")


def main():
    if not HAS_PLOTTING:
        import sys

        sys.exit(1)

    parser = argparse.ArgumentParser(
        description="Generate charts from dispenso benchmark results"
    )
    parser.add_argument(
        "--input",
        "-i",
        type=Path,
        required=True,
        help="Input JSON file from run_benchmarks.py",
    )
    parser.add_argument(
        "--output-dir",
        "-o",
        type=Path,
        default=Path("charts"),
        help="Output directory for charts and reports",
    )
    parser.add_argument(
        "--platform",
        "-p",
        type=str,
        default=None,
        help="Platform identifier (e.g., 'linux-threadripper', 'macos-m2', 'windows-zen4'). "
        "When set, output goes into a per-platform subdirectory under output-dir.",
    )

    args = parser.parse_args()

    # If platform specified, output into a subdirectory
    output_dir = args.output_dir
    if args.platform:
        output_dir = output_dir / args.platform

    # Load input data
    print(f"Loading results from: {args.input}")
    with open(args.input) as f:
        data = json.load(f)

    machine_info = data.get("machine_info", {})
    results = data.get("results", [])

    print(f"  CPU: {machine_info.get('cpu_model', 'unknown')}")
    print(f"  Hardware Threads: {machine_info.get('cpu_cores', 'unknown')}")
    compiler = machine_info.get("compiler", {})
    if compiler:
        print(f"  Compiler: {compiler.get('compiler_summary', 'unknown')}")
    if args.platform:
        print(f"  Platform: {args.platform}")
    print()

    # Extract benchmark data
    df = extract_benchmark_data(results)
    if df.empty:
        print("No benchmark data found in input file")
        import sys

        sys.exit(1)

    print(f"Found {len(df)} benchmark results across {df['suite'].nunique()} suites")
    print()

    # Generate charts
    generate_charts(df, output_dir)
    generate_markdown_report(machine_info, results, df, output_dir)

    print(f"\nAll charts saved to: {output_dir}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/generate_plotly_benchmarks.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""Generate an interactive benchmark dashboard using Plotly.js.

Reads one or more dispenso benchmark JSON files and produces a single
self-contained HTML file with interactive charts, dark/light theme,
sidebar navigation, platform switching, and suite-specific visualizations.

Usage:
    python generate_plotly_benchmarks.py results/macos.json
    python generate_plotly_benchmarks.py results/macos.json results/linux.json
    python generate_plotly_benchmarks.py results/*.json -o docs/benchmarks/index.html
"""

import argparse
import json
import re
from collections import defaultdict
from pathlib import Path


# ─── Color palette (consistent with generate_charts.py) ────────────────────

LIBRARY_COLORS = {
    # Primary libraries - must be visually distinct from each other
    "dispenso": "#2ecc71",  # Green
    "tbb": "#3498db",  # Blue
    "omp": "#e74c3c",  # Red
    "taskflow": "#f39c12",  # Orange
    "folly": "#9b59b6",  # Purple
    "serial": "#95a5a6",  # Gray
    "std": "#1abc9c",  # Teal
    "deque": "#e67e22",  # Dark orange
    "async": "#f1c40f",  # Yellow
    # Dispenso variants - each distinct, avoiding primary library colors
    # Must not use: blue (tbb), red (omp), orange (taskflow), purple (folly)
    "dispenso_static_chunk": "#1e8449",  # Dark forest green
    "dispenso_auto_chunk": "#00bcd4",  # Cyan (distinct from green family)
    "dispenso_static": "#1e8449",  # Dark forest green
    "dispenso_auto": "#00bcd4",  # Cyan (distinct from green family)
    "dispenso_bulk": "#1e8449",  # Dark green, dashed in line charts
    "dispenso_par": "#27ae60",  # Darker green (variant of dispenso)
    "dispenso_mostly_idle": "#48c9b0",  # Teal-green
    "dispenso_very_idle": "#76d7c4",  # Light teal
    "dispenso_mixed": "#58d68d",  # Light green
    "dispenso2": "#27ae60",  # Darker green
    # Cascading parallel for variants
    "dispenso_blocking": "#2ecc71",  # Green (same as dispenso)
    "dispenso_cascaded": "#2ecc71",  # Green (distinguished by dash)
    "tbb_task_group": "#3498db",  # Blue (distinguished by dash)
    # for_each container variants - each distinct
    "for_each_n": "#2ecc71",  # Green (dispenso default)
    "for_each_n_deque": "#e67e22",  # Dark orange
    "for_each_n_list": "#3498db",  # Blue
    "for_each_n_set": "#9b59b6",  # Purple
}

IMPL_COLORS = {
    "serial": "#95a5a6",
    "std::async": "#e74c3c",
    "folly::Future": "#e67e22",
    "dispenso::Future": "#3498db",
    "dispenso::TaskSet": "#2ecc71",
    "dispenso::TaskSet (bulk)": "#1e8449",
    "dispenso::when_all": "#9b59b6",
    "std::vector": "#808080",
    "std::deque": "#FF8C00",
    "tbb::concurrent_vector": "#4285F4",
    "dispenso::ConcurrentVector": "#34A853",
    "NopMutex": "#95a5a6",
    "std::shared_mutex": "#e74c3c",
    "dispenso::RWLock": "#2ecc71",
    "std::function": "#e74c3c",
    "dispenso::OnceFunction": "#2ecc71",
    "InlineFunction": "#3498db",
    "malloc/free": "#e74c3c",
    "PoolAllocator": "#2ecc71",
    "Arena": "#27ae60",
    "NoLock": "#3498db",
    "NoLock Arena": "#2980b9",
    "SmallBufferAllocator": "#2ecc71",
    "new/delete": "#e74c3c",
}


def get_color(name):
    """Get color for a library/implementation name.

    Tries exact match first, then progressively longer prefix matches
    to ensure dispenso_static gets a different color than dispenso.
    """
    if name in IMPL_COLORS:
        return IMPL_COLORS[name]
    nl = name.lower()
    # Try exact match in LIBRARY_COLORS
    if nl in LIBRARY_COLORS:
        return LIBRARY_COLORS[nl]
    # Try longest-prefix match so 'dispenso_static' beats 'dispenso'
    best_match = ""
    best_color = "#7f8c8d"
    for key, color in LIBRARY_COLORS.items():
        if (nl.startswith(key) or key in nl) and len(key) > len(best_match):
            best_match = key
            best_color = color
    return best_color


# ─── Benchmark name parsing ────────────────────────────────────────────────


def parse_benchmark_name(name):
    result = {"library": name, "threads": None, "worksize": None, "raw": name}
    if "<" in name and ">" in name:
        m = re.match(r"([^<]+)<([^>]+)>", name)
        if m:
            result["library"] = m.group(1)
            result["worksize"] = m.group(2)
        return result
    parts = name.split("/")
    if parts and parts[-1] == "real_time":
        parts = parts[:-1]
    if len(parts) >= 3:
        result["library"] = parts[0]
        try:
            result["threads"] = int(parts[1])
            result["worksize"] = parts[2]
        except (ValueError, IndexError):
            pass
    elif len(parts) == 2:
        result["library"] = parts[0]
        try:
            result["threads"] = int(parts[1])
            result["worksize"] = "default"
        except ValueError:
            pass
    return result


# ─── Shared helpers ────────────────────────────────────────────────────────


def _format_worksize(ws):
    """Format a numeric worksize string into a compact label (e.g. '1000000' → '1M')."""
    ws_int = int(ws) if ws.isdigit() else 0
    if ws_int >= 1_000_000_000:
        return f"{ws_int // 1_000_000_000}B"
    if ws_int >= 1_000_000:
        return f"{ws_int // 1_000_000}M"
    if ws_int >= 1_000:
        return f"{ws_int // 1_000}K"
    return ws


def _lib_sort_key(lib):
    """Sort key: serial first, dispenso, then dispenso variants, then others."""
    ll = lib.lower()
    if ll == "serial":
        return (0, ll)
    if ll == "dispenso":
        return (1, ll)
    if ll.startswith("dispenso"):
        return (2, ll)
    return (3, ll)


# ─── Suite-specific chart builders ──────────────────────────────────────────
# Each returns a list of chart config dicts consumable by the JS renderer.


def _parse_line_benchmarks(benchmarks):
    """Parse benchmarks into grouped thread-scaling data and serial baselines."""
    size_map = {
        "kSmallSize": "1000",
        "kMediumSize": "1000000",
        "kLargeSize": "100000000",
        "100": "100",
    }
    grouped = defaultdict(dict)
    serial_points = {}

    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        name = bm["name"].replace("BM_", "")
        parsed = parse_benchmark_name(name)
        lib = parsed["library"]

        # Filter out auto_chunk, static_chunk, and other variants (auto, static)
        # to keep charts clean - matches generate_charts.py behavior
        lib_lower = lib.lower()
        if "auto" in lib_lower or "static" in lib_lower:
            continue

        if parsed.get("worksize") and "<" not in bm["name"]:
            # Slash-delimited: lib/threads/worksize
            threads = parsed["threads"]
            if threads is not None:
                grouped[parsed["worksize"]][(lib, threads)] = bm["real_time"]
        elif "<" in bm["name"]:
            # Template: serial<kSmallSize>
            m = re.match(r"BM_([^<]+)<([^>]+)>", bm["name"])
            if m:
                ws_key = m.group(2)
                serial_points[size_map.get(ws_key, ws_key)] = bm["real_time"]

    return grouped, serial_points


def build_line_charts(benchmarks, suite):
    """Thread-scaling line charts for simple_for, summing_for, trivial_compute, nested_for."""
    grouped, serial_points = _parse_line_benchmarks(benchmarks)

    charts = []
    for ws in sorted(grouped.keys(), key=lambda x: int(x) if x.isdigit() else 0):
        by_lib = defaultdict(list)
        thread_set = set()
        for (lib, threads), time_ns in grouped[ws].items():
            by_lib[lib].append((threads, time_ns))
            thread_set.add(threads)

        traces = []
        # Add serial baseline first (acts as reference point)
        if ws in serial_points:
            max_t = max(thread_set) if thread_set else 1
            traces.append(
                {
                    "name": "serial",
                    "x": [1, max_t],
                    "y": [serial_points[ws], serial_points[ws]],
                    "color": get_color("serial"),
                    "dash": "dash",
                    "baseline": True,
                }
            )

        for lib in sorted(by_lib.keys(), key=_lib_sort_key):
            points = sorted(by_lib[lib])
            trace = {
                "name": lib,
                "x": [p[0] for p in points],
                "y": [p[1] for p in points],
                "color": get_color(lib),
            }
            # In cascading_parallel_for, distinguish blocking vs non-blocking with dashing.
            # Blocking variants (tbb parallel_for, dispenso blocking) get dashed lines;
            # non-blocking variants (tbb_task_group, dispenso cascaded) get solid lines.
            if suite == "cascading_parallel_for":
                ll = lib.lower()
                if ll == "tbb" or "blocking" in ll:
                    trace["dash"] = "dash"
            traces.append(trace)

        label = _format_worksize(ws)
        suite_display = suite.replace("_", " ").title()
        if ws == "default":
            title = suite_display
        else:
            title = f"{suite_display} - {label} Elements"
        chart_cfg = {
            "id": f"{suite}_{ws}",
            "suite": suite,
            "type": "line",
            "title": title,
            "traces": traces,
            "xaxis": "Threads",
            "yaxis_unit": "ns",
        }
        # simple_for has extreme outliers (taskflow ~7000x slower) where
        # auto-zoom hides the comparison entirely.  Show full range so the
        # user can see where taskflow sits, then zoom in interactively.
        if suite == "simple_for":
            chart_cfg["no_auto_zoom"] = True
        charts.append(chart_cfg)
    return charts


_CV_LIB_PREFIXES = ["std_", "deque_", "tbb_", "dispenso_"]
_CV_LIB_MAP = {
    "std": "std::vector",
    "deque": "std::deque",
    "tbb": "tbb::concurrent_vector",
    "dispenso": "dispenso::ConcurrentVector",
}
_CV_LIB_ORDER = [
    "std::vector",
    "std::deque",
    "tbb::concurrent_vector",
    "dispenso::ConcurrentVector",
]


def _parse_concurrent_vector_benchmarks(benchmarks):
    """Parse concurrent_vector benchmarks into (operation, library) -> time."""
    data = {}
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        for prefix in _CV_LIB_PREFIXES:
            if clean.startswith(prefix):
                lib_key = prefix.rstrip("_")
                op = clean[len(prefix) :]
                lib = _CV_LIB_MAP.get(lib_key, lib_key)
                data[(op, lib)] = bm["real_time"]
                break
    return data


def _build_cv_group_chart(data, suite, group_name, ops_list, suffix, title_suffix):
    """Build a single concurrent vector chart for a serial or parallel group."""
    ops = [op for op in ops_list if any((op, lib) in data for lib in _CV_LIB_ORDER)]
    libs = [lib for lib in _CV_LIB_ORDER if any((op, lib) in data for op in ops)]
    if not ops or not libs:
        return None

    group_data = []
    for lib in libs:
        values = [data.get((op, lib), 0) for op in ops]
        group_data.append({"name": lib, "values": values, "color": get_color(lib)})

    label = "Serial/Access" if group_name == "serial" else "Parallel"
    return {
        "id": f"concurrent_vector_{group_name}{suffix}",
        "suite": suite,
        "type": "grouped_bar_h",
        "title": f"Concurrent Vector - {label} Operations{title_suffix}",
        "categories": [op.replace("_", " ") for op in ops],
        "groups": group_data,
        "xaxis_unit": "ns",
        "log_scale": True,
    }


def build_concurrent_vector_charts(benchmarks, suite):
    """Grouped horizontal bar charts split into serial/parallel operations."""
    serial_ops = [
        "push_back_serial",
        "push_back_serial_reserve",
        "iterate",
        "iterate_reverse",
        "lower_bound",
        "index",
        "random",
    ]
    parallel_ops = [
        "parallel",
        "parallel_reserve",
        "parallel_clear",
        "parallel_grow_by_10",
        "parallel_grow_by_100",
        "parallel_grow_by_max",
    ]

    data = _parse_concurrent_vector_benchmarks(benchmarks)
    suffix = "_tcmalloc" if "tcmalloc" in suite else ""
    title_suffix = " (tcmalloc)" if suffix else ""

    charts = []
    for group_name, ops_list in [("serial", serial_ops), ("parallel", parallel_ops)]:
        chart = _build_cv_group_chart(
            data, suite, group_name, ops_list, suffix, title_suffix
        )
        if chart:
            charts.append(chart)
    return charts


def _parse_future_benchmarks(benchmarks, impl_map, sizes):
    """Parse future benchmarks into data dict and error set."""
    data = {}
    error_set = set()
    for bm in benchmarks:
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        size = None
        for sz in ["kSmallSize", "kMediumSize", "kLargeSize"]:
            if sz in clean:
                size = sz.replace("k", "").replace("Size", "")
                clean = clean.replace(f"<{sz}>", "")
                break
        impl = impl_map.get(clean, clean.replace("_", " "))
        if bm.get("error_occurred"):
            if size:
                error_set.add((impl, size))
            continue
        if bm.get("run_type") != "iteration":
            continue
        if size:
            data[(impl, size)] = bm["real_time"]
    return data, error_set


def _build_error_aware_groups(impls, available_sizes, data, error_set):
    """Build grouped bar data with error placeholders at max category height."""
    cat_maxes = {}
    for s in available_sizes:
        vals = [data.get((i, s), 0) for i in impls]
        cat_maxes[s] = max(vals) if vals else 0

    groups = []
    for impl in impls:
        values = []
        error_indices = []
        for idx, s in enumerate(available_sizes):
            if (impl, s) in error_set:
                values.append(cat_maxes[s])
                error_indices.append(idx)
            else:
                values.append(data.get((impl, s), 0))
        group = {"name": impl, "values": values, "color": get_color(impl)}
        if error_indices:
            group["error_indices"] = error_indices
        groups.append(group)
    return groups


def build_future_charts(benchmarks, suite):
    """Grouped bar chart by tree size for future benchmark."""
    impl_map = {
        "serial_tree": "serial",
        "std_tree": "std::async",
        "folly_tree": "folly::Future",
        "dispenso_tree": "dispenso::Future",
        "dispenso_tree_when_all": "dispenso::when_all",
        "dispenso_taskset_tree": "dispenso::TaskSet",
        "dispenso_taskset_tree_bulk": "dispenso::TaskSet (bulk)",
    }
    impl_order = [
        "serial",
        "std::async",
        "folly::Future",
        "dispenso::Future",
        "dispenso::TaskSet",
        "dispenso::TaskSet (bulk)",
        "dispenso::when_all",
    ]
    sizes = ["Small", "Medium", "Large"]

    data, error_set = _parse_future_benchmarks(benchmarks, impl_map, sizes)

    impls = [
        i
        for i in impl_order
        if any((i, s) in data or (i, s) in error_set for s in sizes)
    ]
    available_sizes = [
        s for s in sizes if any((i, s) in data or (i, s) in error_set for i in impls)
    ]

    groups = _build_error_aware_groups(impls, available_sizes, data, error_set)

    return [
        {
            "id": "future",
            "suite": suite,
            "type": "grouped_bar_v",
            "title": "Future/Async Tree Build Benchmark",
            "categories": available_sizes,
            "groups": groups,
            "yaxis_unit": "ns",
            "xaxis": "Tree Size",
        }
    ]


def build_graph_charts(benchmarks, suite):
    """Horizontal bar chart with logical grouping for graph/graph_scene."""
    # graph_scene: dispenso apples-to-apples with taskflow first,
    # then partial_revaluation last (unique dispenso feature)
    graph_scene_order = [
        "scene graph parallel for",
        "scene graph taskflow",
        "scene graph concurrent task set",
        "scene graph partial revaluation",
    ]
    graph_order = [
        "taskflow build big tree",
        "build big tree (Graph)",
        "build big tree (BiProp)",
        "build bi prop dependency chain",
        "build bi prop dependency group",
        "build dependency chain (Graph)",
        "build dependency chain (BiProp)",
        "execute dependency chain (Graph)",
        "execute dependency chain (BiProp)",
        "forward propagator node (Graph)",
        "forward propagator node (BiProp)",
    ]
    group_order = graph_scene_order if suite == "graph_scene" else graph_order

    items = []
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        clean = clean.replace("<dispenso::BiPropGraph>", " (BiProp)")
        clean = clean.replace("<dispenso::Graph>", " (Graph)")
        clean = clean.replace("_", " ")
        items.append({"name": clean, "time_ns": bm["real_time"]})

    # Sort by group_order, then alphabetical for unknowns
    def sort_key(item):
        try:
            return (group_order.index(item["name"]), "")
        except ValueError:
            return (len(group_order), item["name"])

    items.sort(key=sort_key)

    def color_for(name):
        if "taskflow" in name.lower():
            return "#f39c12"
        if "BiProp" in name:
            return "#27ae60"
        return "#2ecc71"

    suite_display = suite.replace("_", " ").title()
    return [
        {
            "id": f"{suite}_bar",
            "suite": suite,
            "type": "bar_h_colored",
            "title": f"{suite_display} Benchmark",
            "items": [
                {
                    "name": i["name"],
                    "time_ns": i["time_ns"],
                    "color": color_for(i["name"]),
                }
                for i in items
            ],
            "xaxis_unit": "ns",
        }
    ]


def _parse_rw_lock_benchmarks(benchmarks):
    """Parse rw_lock benchmarks into (operation, mutex, threads, contention) → time."""
    data = {}
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        mutex = None
        for m in ["NopMutex", "std::shared_mutex", "dispenso::RWLock"]:
            if m in clean:
                mutex = m
                clean = clean.replace(f"<{m}>", "")
                break
        parts = clean.split("/")
        operation = parts[0]
        if operation == "serial":
            cont = parts[1] if len(parts) > 1 else None
            data[(operation, mutex, None, cont)] = bm["real_time"]
        else:
            threads = parts[1] if len(parts) > 1 else None
            cont = parts[2] if len(parts) > 2 else None
            data[(operation, mutex, threads, cont)] = bm["real_time"]
    return data


def _build_grouped_bar_chart(chart_id, suite, title, categories, mutexes, data, key_fn):
    """Build a grouped horizontal bar chart config for a set of mutexes."""
    groups = []
    for mutex in mutexes:
        values = [data.get(key_fn(mutex, cat), 0) for cat in categories]
        if any(v > 0 for v in values):
            groups.append({"name": mutex, "values": values, "color": get_color(mutex)})
    if not groups:
        return None
    return {
        "id": chart_id,
        "suite": suite,
        "type": "grouped_bar_h",
        "title": title,
        "categories": [str(c) for c in categories],
        "groups": groups,
        "xaxis_unit": "ns",
    }


def _build_rw_lock_parallel_charts(data, suite, mutexes):
    """Build per-contention parallel RW lock charts."""
    parallel_keys = [k for k in data if k[0] == "parallel"]
    parallel_conts = sorted(set(k[3] for k in parallel_keys if k[3]), key=int)
    parallel_threads = sorted(set(k[2] for k in parallel_keys if k[2]), key=int)
    charts = []
    for cont in parallel_conts:
        chart = _build_grouped_bar_chart(
            f"rw_lock_parallel_{cont}",
            suite,
            f"RW Lock - Parallel ({cont} iterations)",
            parallel_threads,
            mutexes,
            data,
            lambda mutex, t, c=cont: ("parallel", mutex, t, c),
        )
        if chart:
            chart["categories"] = [f"{t} threads" for t in parallel_threads]
            charts.append(chart)
    return charts


def build_rw_lock_charts(benchmarks, suite):
    """Serial + per-contention parallel grouped bar charts."""
    data = _parse_rw_lock_benchmarks(benchmarks)

    mutexes_serial = ["NopMutex", "std::shared_mutex", "dispenso::RWLock"]
    mutexes_parallel = ["std::shared_mutex", "dispenso::RWLock"]
    charts = []

    # Serial chart
    serial_conts = sorted(set(k[3] for k in data if k[0] == "serial" and k[3]), key=int)
    if serial_conts:
        chart = _build_grouped_bar_chart(
            "rw_lock_serial",
            suite,
            "RW Lock - Serial Operations",
            serial_conts,
            mutexes_serial,
            data,
            lambda mutex, c: ("serial", mutex, None, c),
        )
        if chart:
            chart["categories"] = [f"{c} iterations" for c in serial_conts]
            charts.append(chart)

    charts.extend(_build_rw_lock_parallel_charts(data, suite, mutexes_parallel))
    return charts


_OF_SIZE_ORDER = ["kSmallSize", "kMediumSize", "kLargeSize", "kExtraLargeSize"]
_OF_SIZE_LABELS = {
    "kSmallSize": "Small",
    "kMediumSize": "Medium",
    "kLargeSize": "Large",
    "kExtraLargeSize": "Extra Large",
}
_OF_FUNC_COLORS = {
    "Std Function": "#e74c3c",
    "Once Function": "#2ecc71",
    "Inline Function": "#3498db",
}


def _parse_once_function_benchmarks(benchmarks):
    """Parse once_function benchmarks into (op, func_type, size) -> time."""
    data = {}
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        if clean.startswith("move_"):
            op = "move"
            clean = clean[5:]
        elif clean.startswith("queue_"):
            op = "queue"
            clean = clean[6:]
        else:
            continue
        if "<" not in clean:
            continue
        func_type, size = clean.split("<")
        size = size.rstrip(">")
        func_type = func_type.replace("_", " ").title()
        data[(op, func_type, size)] = bm["real_time"]
    return data


def build_once_function_charts(benchmarks, suite):
    """Move vs queue operations, grouped by size."""
    data = _parse_once_function_benchmarks(benchmarks)

    charts = []
    for op in ["move", "queue"]:
        func_types = sorted(set(k[1] for k in data if k[0] == op))
        sizes = [
            s for s in _OF_SIZE_ORDER if any((op, ft, s) in data for ft in func_types)
        ]
        if not sizes or not func_types:
            continue
        groups = []
        for ft in func_types:
            values = [data.get((op, ft, s), 0) for s in sizes]
            groups.append(
                {
                    "name": ft,
                    "values": values,
                    "color": _OF_FUNC_COLORS.get(ft, "#9b59b6"),
                }
            )
        charts.append(
            {
                "id": f"once_function_{op}",
                "suite": suite,
                "type": "grouped_bar_h",
                "title": f"Once Function - {op.title()} Operations",
                "categories": [_OF_SIZE_LABELS.get(s, s) for s in sizes],
                "groups": groups,
                "xaxis_unit": "ns",
            }
        )
    return charts


_ALLOC_PREFIXES = [
    ("mallocfree_threaded", "malloc/free", 19),
    ("mallocfree", "malloc/free", 10),
    ("nl_pool_allocator_arena", "NoLock Arena", 23),
    ("nl_pool_allocator", "NoLock", 17),
    ("pool_allocator_arena", "Arena", 20),
    ("pool_allocator_threaded", "PoolAllocator", 23),
    ("pool_allocator", "PoolAllocator", 14),
]


def _parse_pool_allocator_benchmarks(benchmarks):
    """Parse pool allocator benchmarks into (alloc_type, size, threads, iters) → time."""
    data = {}
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        at = None
        rest = ""
        for prefix, alloc_type, prefix_len in _ALLOC_PREFIXES:
            if clean.startswith(prefix):
                at = alloc_type
                rest = clean[prefix_len:]
                break
        if at is None:
            continue

        size = "unknown"
        threads = 1
        iters = 0
        if "<" in rest:
            params, iters_str = rest.split(">")
            params = params.lstrip("<")
            if "," in params:
                size, threads = params.split(",")
                threads = int(threads)
            else:
                size = params
            if "/" in iters_str:
                iters = int(iters_str.lstrip("/"))
        elif "/" in rest:
            iters = int(rest.lstrip("/"))
        data[(at, size, threads, iters)] = bm["real_time"]
    return data


_PA_SIZE_ORDER = ["kSmallSize", "kMediumSize", "kLargeSize"]
_PA_SIZE_LABELS = {"kSmallSize": "S", "kMediumSize": "M", "kLargeSize": "L"}
_PA_ALLOC_SINGLE = ["malloc/free", "PoolAllocator", "Arena", "NoLock", "NoLock Arena"]
_PA_ALLOC_MULTI = ["malloc/free", "PoolAllocator"]


def _build_pool_allocator_chart(data, suite, tc):
    """Build a single pool allocator chart for a given thread count."""
    if tc == 1:
        alloc_types = _PA_ALLOC_SINGLE
        title_suffix = "Single-threaded"
    else:
        alloc_types = _PA_ALLOC_MULTI
        title_suffix = f"{tc} Threads"

    alloc_types = [
        a for a in alloc_types if any(k[0] == a and k[2] == tc for k in data)
    ]
    iters_list = sorted(set(k[3] for k in data if k[2] == tc))

    categories = []
    for size in _PA_SIZE_ORDER:
        for it in iters_list:
            il = f"{it // 1000}K" if it >= 1000 else str(it)
            categories.append(f"{_PA_SIZE_LABELS.get(size, size)}/{il}")

    groups = []
    for at in alloc_types:
        values = [
            data.get((at, size, tc, it), 0)
            for size in _PA_SIZE_ORDER
            for it in iters_list
        ]
        groups.append({"name": at, "values": values, "color": get_color(at)})

    if not groups:
        return None
    return {
        "id": f"pool_allocator_{tc}t",
        "suite": suite,
        "type": "grouped_bar_h",
        "title": f"Pool Allocator - {title_suffix}",
        "categories": categories,
        "groups": groups,
        "xaxis_unit": "ns",
        "log_scale": True,
    }


def build_pool_allocator_charts(benchmarks, suite):
    """Per-thread-count grouped bar charts."""
    data = _parse_pool_allocator_benchmarks(benchmarks)
    thread_counts = sorted(set(k[2] for k in data))
    charts = []
    for tc in thread_counts:
        chart = _build_pool_allocator_chart(data, suite, tc)
        if chart:
            charts.append(chart)
    return charts


def build_small_buffer_charts(benchmarks, suite):
    """Horizontal bars, grouped side-by-side by allocator type."""
    items = []
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        clean = (
            clean.replace("<kSmallSize>", "[S]")
            .replace("<kMediumSize>", "[M]")
            .replace("<kLargeSize>", "[L]")
        )
        clean = (
            clean.replace("<kSmallSize,", "[S,")
            .replace("<kMediumSize,", "[M,")
            .replace("<kLargeSize,", "[L,")
        )
        clean = clean.replace(">", "]").replace("_", " ")
        is_sb = "small buffer" in clean.lower()
        items.append(
            {
                "name": clean,
                "time_ns": bm["real_time"],
                "color": "#2ecc71" if is_sb else "#e74c3c",
            }
        )

    # Sort by size, iterations, threads, allocator type (to interleave)
    def sort_key(item):
        n = item["name"]
        s = 0 if "[S]" in n or "[S," in n else (1 if "[M]" in n or "[M," in n else 2)
        it_m = re.search(r"/(\d+)", n)
        it = int(it_m.group(1)) if it_m else 0
        th_m = re.search(r"threads:(\d+)", n)
        th = int(th_m.group(1)) if th_m else 1
        is_sb = 1 if "small buffer" in n.lower() else 0
        return (s, it, th, is_sb)

    items.sort(key=sort_key)

    return [
        {
            "id": "small_buffer",
            "suite": suite,
            "type": "bar_h_colored",
            "title": "Small Buffer Allocator Benchmark",
            "items": items,
            "xaxis_unit": "ns",
            "legend": [
                {"name": "SmallBufferAllocator", "color": "#2ecc71"},
                {"name": "new/delete", "color": "#e74c3c"},
            ],
        }
    ]


def _parse_timed_task_benchmark(bm):
    """Parse a single timed_task benchmark entry into a row dict, or None."""
    mean_err = bm.get("mean")
    stddev_err = bm.get("stddev")
    if mean_err is None or stddev_err is None:
        return None
    m = re.match(r"BM_(\w+)<(.+)>", bm["name"])
    if not m:
        return None
    func_name = m.group(1)
    template_args = m.group(2).strip()

    if "mixed" in func_name:
        library = func_name.replace("_mixed", "")
        steady = template_args.strip().lower() == "true"
        config_label = f"Mixed ({'Steady' if steady else 'Normal'})"
    else:
        library = func_name
        parts = [p.strip() for p in template_args.split(",")]
        if len(parts) != 2:
            return None
        try:
            period_ms = int(parts[0])
        except ValueError:
            return None
        steady = parts[1].lower() == "true"
        config_label = f"{period_ms}ms ({'Steady' if steady else 'Normal'})"

    return {
        "library": library,
        "config": config_label,
        "mean_us": mean_err * 1e6,
        "stddev_us": stddev_err * 1e6,
    }


def build_timed_task_charts(benchmarks, suite):
    """Scheduling jitter bar chart with error bars."""
    rows = []
    for bm in benchmarks:
        row = _parse_timed_task_benchmark(bm)
        if row:
            rows.append(row)

    if not rows:
        return []

    # Order configs
    configs = []
    seen = set()
    for r in sorted(rows, key=lambda r: (r["config"].startswith("Mixed"), r["config"])):
        if r["config"] not in seen:
            configs.append(r["config"])
            seen.add(r["config"])

    libraries = sorted(set(r["library"] for r in rows))
    groups = []
    for lib in libraries:
        values = []
        errors = []
        for cfg in configs:
            match = [r for r in rows if r["library"] == lib and r["config"] == cfg]
            if match:
                values.append(match[0]["mean_us"])
                errors.append(match[0]["stddev_us"])
            else:
                values.append(0)
                errors.append(0)
        groups.append(
            {
                "name": lib,
                "values": values,
                "errors": errors,
                "color": get_color(lib),
            }
        )

    return [
        {
            "id": "timed_task",
            "suite": suite,
            "type": "grouped_bar_h_error",
            "title": "Timed Task - Scheduling Jitter",
            "categories": configs,
            "groups": groups,
            "xaxis_label": "Mean Scheduling Error (us)",
        }
    ]


def build_pipeline_charts(benchmarks, suite):
    """Bar chart for pipeline benchmark, grouped by serial vs parallel."""
    items = []
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        clean = clean.replace("_", " ")
        color = get_color(clean.split()[0] if clean.split() else "")
        items.append({"name": clean, "time_ns": bm["real_time"], "color": color})

    # Sort: serial first, then by library within each group
    def sort_key(item):
        n = item["name"].lower()
        is_par = "par" in n or "parallel" in n
        lib_order = 0
        if "serial" in n:
            lib_order = 0
        elif "dispenso" in n:
            lib_order = 1
        elif "tbb" in n:
            lib_order = 2
        elif "taskflow" in n:
            lib_order = 3
        else:
            lib_order = 4
        return (1 if is_par else 0, lib_order, n)

    items.sort(key=sort_key)

    suite_display = suite.replace("_", " ").title()
    return [
        {
            "id": f"{suite}_bar",
            "suite": suite,
            "type": "bar_h_colored",
            "title": f"{suite_display} Benchmark",
            "items": items,
            "xaxis_unit": "ns",
        }
    ]


def _parse_generic_line_benchmarks(benchmarks):
    """Parse benchmarks into thread-scaling data and error tracking.

    Returns (grouped, errored) where:
    - grouped: {worksize: {(lib, threads): real_time}}
    - errored: {worksize: set of libs with error_occurred}
    """
    grouped = defaultdict(dict)
    errored = defaultdict(set)

    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        parsed = parse_benchmark_name(bm["name"].replace("BM_", ""))
        lib = parsed["library"]
        threads = parsed.get("threads")
        ws = parsed.get("worksize")
        if threads is not None and ws is not None:
            if bm.get("error_occurred"):
                errored[ws].add(lib)
                continue
            grouped[ws][(lib, threads)] = bm["real_time"]

    return grouped, errored


def _skipped_libs_list(errored_libs, valid_libs):
    """Build skipped_libs chart config for libraries that only have errors."""
    skipped = [lib for lib in sorted(errored_libs) if lib not in valid_libs]
    if not skipped:
        return None
    return [{"name": lib, "color": get_color(lib)} for lib in skipped]


def build_generic_line_charts(benchmarks, suite):
    """Generic line charts for suites with thread/worksize structure."""
    grouped, errored = _parse_generic_line_benchmarks(benchmarks)

    charts = []
    for ws in sorted(grouped.keys(), key=lambda x: int(x) if x.isdigit() else 0):
        by_lib = defaultdict(list)
        for (lib, threads), t in grouped[ws].items():
            by_lib[lib].append((threads, t))

        traces = []
        for lib in sorted(by_lib.keys(), key=_lib_sort_key):
            points = sorted(by_lib[lib])
            trace = {
                "name": lib,
                "x": [p[0] for p in points],
                "y": [p[1] for p in points],
                "color": get_color(lib),
            }
            if "bulk" in lib.lower():
                trace["dash"] = "dash"
            traces.append(trace)

        label = _format_worksize(ws)
        suite_display = suite.replace("_", " ").title()
        chart_cfg = {
            "id": f"{suite}_{ws}",
            "suite": suite,
            "type": "line",
            "title": suite_display
            if ws == "default"
            else f"{suite_display} - {label} Elements",
            "traces": traces,
            "xaxis": "Threads",
            "yaxis_unit": "ns",
        }
        skipped = _skipped_libs_list(errored.get(ws, set()), by_lib)
        if skipped:
            chart_cfg["skipped_libs"] = skipped
        charts.append(chart_cfg)

    # Also generate charts for worksizes where ALL libraries errored
    # (no entries in grouped but present in errored)
    for ws in sorted(errored.keys(), key=lambda x: int(x) if x.isdigit() else 0):
        if ws in grouped:
            continue
        label = _format_worksize(ws)
        suite_display = suite.replace("_", " ").title()
        charts.append(
            {
                "id": f"{suite}_{ws}",
                "suite": suite,
                "type": "line",
                "title": f"{suite_display} - {label} Elements",
                "traces": [],
                "xaxis": "Threads",
                "yaxis_unit": "ns",
                "skipped_libs": [
                    {"name": lib, "color": get_color(lib)}
                    for lib in sorted(errored[ws])
                ],
            }
        )
    return charts


def build_generic_bar_charts(benchmarks, suite):
    """Generic bar chart, items grouped so alternatives for same case are side-by-side."""
    items = []
    for bm in benchmarks:
        if bm.get("run_type") != "iteration":
            continue
        clean = bm["name"].replace("BM_", "").replace("/real_time", "")
        clean_display = clean.replace("_", " ")
        items.append(
            {
                "name": clean_display,
                "time_ns": bm["real_time"],
                "color": get_color(clean_display),
            }
        )

    # Sort: serial first, then group by operation
    def sort_key(item):
        n = item["name"].lower()
        is_par = "par" in n or "parallel" in n
        lib_order = 0
        if "serial" in n:
            lib_order = 0
        elif "dispenso" in n:
            lib_order = 1
        elif "tbb" in n:
            lib_order = 2
        elif "taskflow" in n:
            lib_order = 3
        elif "omp" in n:
            lib_order = 4
        elif "folly" in n:
            lib_order = 5
        elif "std" in n:
            lib_order = 6
        else:
            lib_order = 7
        return (1 if is_par else 0, lib_order, n)

    items.sort(key=sort_key)

    suite_display = suite.replace("_", " ").title()
    return [
        {
            "id": f"{suite}_bar",
            "suite": suite,
            "type": "bar_h_colored",
            "title": f"{suite_display} Benchmark",
            "items": items,
            "xaxis_unit": "ns",
        }
    ]


# ─── Chart dispatch ─────────────────────────────────────────────────────────

SPECIALIZED_LINE = {
    "simple_for",
    "summing_for",
    "trivial_compute",
    "nested_for",
    "cascading_parallel_for",
}


def build_charts_for_suite(benchmarks, suite):
    """Route suite to appropriate chart builder."""
    if suite in SPECIALIZED_LINE:
        return build_line_charts(benchmarks, suite)
    if suite in ("concurrent_vector", "concurrent_vector_tcmalloc"):
        return build_concurrent_vector_charts(benchmarks, suite)
    if suite == "future":
        return build_future_charts(benchmarks, suite)
    if suite in ("graph", "graph_scene"):
        return build_graph_charts(benchmarks, suite)
    if suite == "rw_lock":
        return build_rw_lock_charts(benchmarks, suite)
    if suite == "once_function":
        return build_once_function_charts(benchmarks, suite)
    if suite == "pool_allocator":
        return build_pool_allocator_charts(benchmarks, suite)
    if suite == "small_buffer":
        return build_small_buffer_charts(benchmarks, suite)
    if suite == "timed_task":
        return build_timed_task_charts(benchmarks, suite)
    if suite == "pipeline":
        return build_pipeline_charts(benchmarks, suite)
    # Generic fallback: check if data has thread structure
    has_threads = any(
        parse_benchmark_name(bm["name"].replace("BM_", "")).get("threads") is not None
        for bm in benchmarks
        if bm.get("run_type") == "iteration"
    )
    if has_threads:
        return build_generic_line_charts(benchmarks, suite)
    return build_generic_bar_charts(benchmarks, suite)


# ─── HTML generation ────────────────────────────────────────────────────────


def generate_html(platform_data, output_path):
    """Generate the full interactive HTML page.

    platform_data: list of {"id": str, "label": str, "machine": dict, "charts": list}
    """
    # Build per-platform JSON blob
    platforms_json = json.dumps(platform_data)

    # Collect all suites across platforms for navigation
    all_suites = []
    seen = set()
    for pd_item in platform_data:
        for chart in pd_item["charts"]:
            s = chart["suite"]
            if s not in seen:
                all_suites.append(s)
                seen.add(s)

    nav_items = "\n".join(
        f'<a href="#section-{s}" class="nav-link" data-suite="{s}">'
        f"{s.replace('_', ' ').title()}</a>"
        for s in all_suites
    )

    html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dispenso Benchmark Results</title>
<script src="https://cdn.plot.ly/plotly-2.35.2.min.js"></script>
<style>
:root {{
  --bg-primary: #0d1117; --bg-secondary: #161b22; --bg-tertiary: #21262d;
  --bg-card: #1c2128; --text-primary: #e6edf3; --text-secondary: #8b949e;
  --text-muted: #6e7681; --border: #30363d; --accent: #2ecc71;
  --accent-hover: #27ae60; --accent-dim: rgba(46,204,113,0.15);
  --shadow: rgba(0,0,0,0.3); --nav-width: 240px; --header-height: 56px;
}}
html[data-theme="light"] {{
  --bg-primary: #ffffff; --bg-secondary: #f6f8fa; --bg-tertiary: #eaeef2;
  --bg-card: #ffffff; --text-primary: #1f2328; --text-secondary: #656d76;
  --text-muted: #8b949e; --border: #d0d7de; --accent: #1a7f37;
  --accent-hover: #116329; --accent-dim: rgba(26,127,55,0.1);
  --shadow: rgba(31,35,40,0.12);
}}
* {{ margin:0; padding:0; box-sizing:border-box; }}
body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Noto Sans', Helvetica, Arial, sans-serif; background: var(--bg-primary); color: var(--text-primary); line-height: 1.5; }}
.header {{ position:fixed; top:0; left:0; right:0; height:var(--header-height); background:var(--bg-secondary); border-bottom:1px solid var(--border); display:flex; align-items:center; padding:0 20px; z-index:100; }}
.header-logo {{ display:flex; align-items:center; gap:10px; font-size:18px; font-weight:600; }}
.header-logo .icon {{ width:28px; height:28px; background:var(--accent); border-radius:6px; display:flex; align-items:center; justify-content:center; font-size:15px; color:#fff; font-weight:700; }}
.header-right {{ margin-left:auto; display:flex; align-items:center; gap:12px; }}
.btn {{ background:var(--bg-tertiary); border:1px solid var(--border); color:var(--text-secondary); padding:5px 12px; border-radius:6px; cursor:pointer; font-size:13px; transition:all .2s; }}
.btn:hover {{ color:var(--text-primary); border-color:var(--accent); }}
.btn.active {{ background:var(--accent-dim); border-color:var(--accent); color:var(--accent); font-weight:500; }}
.platform-select {{ background:var(--bg-tertiary); border:1px solid var(--border); color:var(--text-primary); padding:5px 12px; border-radius:6px; font-size:13px; cursor:pointer; }}
.platform-select option {{ background:var(--bg-secondary); color:var(--text-primary); }}
.sidebar {{ position:fixed; top:var(--header-height); left:0; bottom:0; width:var(--nav-width); background:var(--bg-secondary); border-right:1px solid var(--border); overflow-y:auto; padding:12px 0; z-index:50; scrollbar-width:thin; scrollbar-color:var(--border) transparent; }}
.search-box {{ margin:0 14px 12px; }}
.search-input {{ width:100%; padding:6px 10px; background:var(--bg-tertiary); border:1px solid var(--border); border-radius:6px; color:var(--text-primary); font-size:12px; outline:none; }}
.search-input:focus {{ border-color:var(--accent); }}
.search-input::placeholder {{ color:var(--text-muted); }}
.nav-section-title {{ padding:6px 14px; font-size:10px; font-weight:600; text-transform:uppercase; letter-spacing:.5px; color:var(--text-muted); }}
.nav-link {{ display:block; padding:5px 14px; color:var(--text-secondary); text-decoration:none; font-size:13px; transition:all .15s; border-left:3px solid transparent; }}
.nav-link:hover {{ color:var(--text-primary); background:var(--bg-tertiary); }}
.nav-link.active {{ color:var(--accent); background:var(--accent-dim); border-left-color:var(--accent); font-weight:500; }}
.main {{ margin-left:var(--nav-width); margin-top:var(--header-height); padding:24px; max-width:1400px; }}
.info-cards {{ display:grid; grid-template-columns:repeat(auto-fit, minmax(220px,1fr)); gap:12px; margin-bottom:24px; }}
.info-card {{ background:var(--bg-card); border:1px solid var(--border); border-radius:10px; padding:14px; transition:border-color .2s; }}
.info-card:hover {{ border-color:var(--accent); }}
.info-card .label {{ font-size:10px; font-weight:600; text-transform:uppercase; letter-spacing:.5px; color:var(--text-muted); margin-bottom:4px; }}
.info-card .value {{ font-size:16px; font-weight:600; }}
.info-card .detail {{ font-size:12px; color:var(--text-secondary); margin-top:2px; }}
.stats-bar {{ display:flex; gap:20px; margin-bottom:24px; padding:12px 16px; background:var(--bg-card); border:1px solid var(--border); border-radius:10px; font-size:13px; }}
.stat-item {{ display:flex; align-items:center; gap:6px; }}
.stat-dot {{ width:7px; height:7px; border-radius:50%; }}
.stat-dot.ok {{ background:#2ecc71; }} .stat-dot.info {{ background:#3498db; }}
.benchmark-section {{ margin-bottom:40px; scroll-margin-top:calc(var(--header-height) + 20px); }}
.section-title {{ font-size:20px; font-weight:600; margin-bottom:16px; padding-bottom:8px; border-bottom:1px solid var(--border); }}
.chart-container {{ background:var(--bg-card); border:1px solid var(--border); border-radius:10px; padding:12px; margin-bottom:12px; transition:border-color .2s; }}
.chart-container:hover {{ border-color:var(--accent); box-shadow:0 2px 8px var(--shadow); }}
.chart {{ width:100%; min-height:400px; }}
.chart .modebar {{ left: 0 !important; right: auto !important; }}
@media (max-width:768px) {{ .sidebar {{ display:none; }} .main {{ margin-left:0; }} }}
::-webkit-scrollbar {{ width:6px; }} ::-webkit-scrollbar-track {{ background:transparent; }}
::-webkit-scrollbar-thumb {{ background:var(--border); border-radius:3px; }}
</style>
</head>
<body>

<header class="header">
  <div class="header-logo"><div class="icon">D</div><span>Dispenso Benchmarks</span></div>
  <div class="header-right">
    <select class="platform-select" id="platformSelect" onchange="switchPlatform(this.value)"></select>
    <button class="btn" onclick="toggleTheme()" id="themeBtn">Light</button>
  </div>
</header>

<nav class="sidebar">
  <div class="search-box"><input type="text" class="search-input" placeholder="Filter..." oninput="filterNav(this.value)"></div>
  <div class="nav-section-title">Benchmark Suites</div>
  {nav_items}
</nav>

<main class="main">
  <div class="info-cards" id="infoCards"></div>
  <div class="stats-bar" id="statsBar"></div>
  <div id="chartSections"></div>
</main>

<script>
const PLATFORMS = {platforms_json};
let currentPlatformIdx = 0;
let currentTheme = 'dark';

// ─── Time unit helpers ─────────────────────────────────────────
function autoUnit(maxNs) {{
  if (maxNs >= 1e9) return {{ scale: 1e9, label: 's' }};
  if (maxNs >= 1e6) return {{ scale: 1e6, label: 'ms' }};
  if (maxNs >= 1e3) return {{ scale: 1e3, label: 'us' }};
  return {{ scale: 1, label: 'ns' }};
}}

// ─── Theme ─────────────────────────────────────────────────────
function themeLayout() {{
  const dark = currentTheme === 'dark';
  return {{
    paper_bgcolor: dark ? '#1c2128' : '#fff',
    plot_bgcolor: dark ? '#0d1117' : '#f6f8fa',
    font: {{ color: dark ? '#e6edf3' : '#1f2328', family: '-apple-system,BlinkMacSystemFont,Segoe UI,sans-serif', size: 12 }},
    xaxis: {{ gridcolor: dark ? '#21262d' : '#eaeef2', zerolinecolor: dark ? '#30363d' : '#d0d7de' }},
    yaxis: {{ gridcolor: dark ? '#21262d' : '#eaeef2', zerolinecolor: dark ? '#30363d' : '#d0d7de' }},
    legend: {{ bgcolor: dark ? 'rgba(22,27,34,0.9)' : 'rgba(255,255,255,0.95)', bordercolor: dark ? '#30363d' : '#d0d7de', borderwidth: 1 }},
    modebar: {{ orientation: 'h', bgcolor: 'transparent', activecolor: dark ? '#58a6ff' : '#0969da' }},
  }};
}}

const plotlyConfig = {{ responsive: true, scrollZoom: true, displayModeBar: true, modeBarButtonsToRemove: ['lasso2d','select2d'], displaylogo: false }};

// Pin the time axis lower bound to zero on any zoom/pan.
// Preserves the zoom span so repeated zoom in/out doesn't drift.
function pinZeroAxis(chartId, axis) {{
  const el = document.getElementById('chart-' + chartId);
  if (!el) return;
  let pinning = false;
  el.on('plotly_relayout', function(ed) {{
    if (pinning) return;
    const key0 = axis + '.range[0]';
    const key1 = axis + '.range[1]';
    // Check if the lower bound moved away from zero
    if (key0 in ed && ed[key0] !== 0) {{
      const lo = ed[key0];
      const hi = key1 in ed ? ed[key1] : el.layout[axis].range[1];
      const span = hi - lo;  // preserve intended zoom level
      pinning = true;
      Plotly.relayout(el, {{ [key0]: 0, [key1]: span }}).then(() => {{ pinning = false; }});
    }}
  }});
}}

// ─── Renderers ─────────────────────────────────────────────────
function renderLine(cfg) {{
  const allY = cfg.traces.flatMap(t => t.baseline ? [] : t.y);  // exclude serial baseline
  const maxY = allY.length > 0 ? Math.max(...allY) : 1;
  const u = autoUnit(maxY);
  const scaledAll = allY.map(v => v / u.scale);

  // Auto-zoom: compare per-trace medians to detect when one trace dominates
  const traceMedians = cfg.traces.filter(t => !t.baseline).map(t => {{
    const sv = [...t.y].filter(v => v > 0).sort((a,b) => a - b);
    return sv.length > 0 ? sv[Math.floor(sv.length / 2)] / u.scale : 0;
  }}).filter(v => v > 0).sort((a,b) => a - b);
  const maxTraceMedian = traceMedians.length > 0 ? traceMedians[traceMedians.length - 1] : 1;
  const minTraceMedian = traceMedians.length > 0 ? traceMedians[0] : 1;
  const autoZoom = !cfg.no_auto_zoom && traceMedians.length >= 2 && maxTraceMedian / minTraceMedian > 5;
  // Zoom to show the competitive traces: 3x the second-highest trace median
  const secondMax = traceMedians.length >= 2 ? traceMedians[traceMedians.length - 2] : maxTraceMedian;
  const zoomMax = secondMax * 3;

  const traces = cfg.traces.map(t => ({{
    x: t.x, y: t.y.map(v => v / u.scale), name: t.name,
    type: 'scatter', mode: t.baseline ? 'lines' : 'lines+markers',
    line: {{ color: t.color, width: t.baseline ? 1.5 : 2.5, dash: t.dash || 'solid' }},
    marker: {{ size: t.baseline ? 0 : 5 }},
    hovertemplate: '%{{x}} threads: %{{y:.4g}} ' + u.label + '<extra>%{{fullData.name}}</extra>',
  }}));

  // Add invisible legend entries for skipped (cannot-complete) libraries
  const skipped = cfg.skipped_libs || [];
  for (const sl of skipped) {{
    traces.push({{
      x: [null], y: [null], name: sl.name + ' (CANNOT COMPLETE)',
      type: 'scatter', mode: 'markers',
      marker: {{ size: 12, color: sl.color, symbol: 'x', line: {{ width: 2, color: sl.color }} }},
      hoverinfo: 'name',
      showlegend: true,
    }});
  }}

  const tl = themeLayout();
  // Add text annotation when libraries are skipped
  const annotations = [];
  if (skipped.length > 0) {{
    const names = skipped.map(s => s.name).join(', ');
    annotations.push({{
      text: names + ': cannot complete (out of memory)',
      xref: 'paper', yref: 'paper', x: 0.5, y: -0.12,
      showarrow: false,
      font: {{ size: 11, color: skipped.length === 1 ? skipped[0].color : '#e74c3c' }},
    }});
  }}
  const layout = {{
    ...tl, title: {{ text: cfg.title + (autoZoom ? ' (auto-zoomed, double-click to reset)' : ''), font: {{ size: 14 }} }},
    xaxis: {{ ...tl.xaxis, title: cfg.xaxis || 'Threads', fixedrange: true }},
    yaxis: {{ ...tl.yaxis, title: 'Time (' + u.label + ')', rangemode: 'tozero',
              ...(autoZoom ? {{ range: [0, zoomMax] }} : {{}}) }},
    legend: {{ ...tl.legend, orientation: 'h', yanchor: 'bottom', y: 1.02, xanchor: 'right', x: 1 }},
    margin: {{ l: 60, r: 20, t: 50, b: skipped.length > 0 ? 70 : 50 }}, hovermode: 'x unified',
    ...(annotations.length > 0 ? {{ annotations }} : {{}}),
  }};
  Plotly.newPlot('chart-' + cfg.id, traces, layout, plotlyConfig);
  pinZeroAxis(cfg.id, 'yaxis');
}}

function renderGroupedBarH(cfg) {{
  const allV = cfg.groups.flatMap(g => g.values);
  const maxV = Math.max(...allV.filter(v => v > 0));
  const u = autoUnit(maxV);
  const useLog = cfg.log_scale && maxV / Math.min(...allV.filter(v => v > 0)) > 50;
  // Reverse so first category is at top
  const cats = [...cfg.categories].reverse();
  const traces = cfg.groups.map(g => ({{
    type: 'bar', orientation: 'h', name: g.name,
    y: cats, x: [...g.values].reverse().map(v => useLog ? v : v / u.scale),
    marker: {{ color: g.color, opacity: 0.85 }},
    hovertemplate: useLog
      ? '%{{y}}: %{{x:.4g}} ns<extra>%{{fullData.name}}</extra>'
      : '%{{y}}: %{{x:.4g}} ' + u.label + '<extra>%{{fullData.name}}</extra>',
  }}));
  const tl = themeLayout();
  const el = document.getElementById('chart-' + cfg.id);
  if (el) el.style.minHeight = Math.max(350, cfg.categories.length * 40 + 100) + 'px';
  Plotly.newPlot('chart-' + cfg.id, traces, {{
    ...tl, barmode: 'group', bargap: 0.15, bargroupgap: 0.1,
    title: {{ text: cfg.title, font: {{ size: 14 }} }},
    xaxis: {{ ...tl.xaxis, title: useLog ? 'Time (ns) - log scale' : 'Time (' + u.label + ')',
              type: useLog ? 'log' : 'linear' }},
    yaxis: {{ ...tl.yaxis, automargin: true, fixedrange: true }},
    legend: {{ ...tl.legend, orientation: 'h', yanchor: 'bottom', y: 1.02, xanchor: 'right', x: 1 }},
    margin: {{ l: 180, r: 20, t: 60, b: 50 }},
  }}, plotlyConfig);
  if (!useLog) pinZeroAxis(cfg.id, 'xaxis');
}}

function renderGroupedBarV(cfg) {{
  const allV = cfg.groups.flatMap(g => g.values);
  const maxV = Math.max(...allV.filter(v => v > 0));
  const u = autoUnit(maxV);

  // Auto-zoom: compare per-group MAX values to detect outlier groups.
  // Using max (not median) ensures we don't clip the tallest competitive bars.
  // Exclude error-placeholder values from zoom calculation.
  const groupMaxes = cfg.groups.map(g => {{
    const errSet = new Set(g.error_indices || []);
    const pos = g.values.filter((v, i) => v > 0 && !errSet.has(i));
    return pos.length > 0 ? Math.max(...pos) : 0;
  }}).filter(v => v > 0).sort((a,b) => a - b);
  const maxGroupMax = groupMaxes.length > 0 ? groupMaxes[groupMaxes.length - 1] : 1;
  const secondGroupMax = groupMaxes.length >= 2 ? groupMaxes[groupMaxes.length - 2] : maxGroupMax;
  const autoZoom = groupMaxes.length >= 2 && maxGroupMax / secondGroupMax > 5;
  const zoomMax = (secondGroupMax * 1.5) / u.scale;

  const traces = cfg.groups.map(g => {{
    const n = g.values.length;
    const errSet = new Set(g.error_indices || []);
    const hasErrors = errSet.size > 0;

    // Per-bar colors and patterns for error bars
    const colors = g.values.map((_, i) => errSet.has(i) ? 'rgba(200,200,200,0.25)' : g.color);
    const patterns = hasErrors ? {{
      shape: g.values.map((_, i) => errSet.has(i) ? '/' : ''),
      fgcolor: g.values.map((_, i) => errSet.has(i) ? '#e74c3c' : g.color),
      solidity: 0.4,
    }} : undefined;
    // Custom hover: show "CANNOT COMPLETE" for error bars
    const customdata = g.values.map((_, i) => errSet.has(i) ? 'CANNOT COMPLETE' : '');
    const hovertemplate = g.values.map((_, i) =>
      errSet.has(i)
        ? '%{{x}}: <b>CANNOT COMPLETE</b><extra>' + g.name + '</extra>'
        : '%{{x}}: %{{y:.4g}} ' + u.label + '<extra>' + g.name + '</extra>'
    );

    return {{
      type: 'bar', name: g.name,
      x: cfg.categories, y: g.values.map(v => v / u.scale),
      marker: {{ color: colors, opacity: 0.85, ...(patterns ? {{ pattern: patterns }} : {{}}) }},
      customdata, hovertemplate,
      // Text label inside error bars
      text: g.values.map((_, i) => errSet.has(i) ? 'CANNOT<br>COMPLETE' : ''),
      textposition: 'inside', insidetextanchor: 'middle', textangle: -90,
      textfont: {{ color: '#e74c3c', size: 11 }},
    }};
  }});
  const tl = themeLayout();
  Plotly.newPlot('chart-' + cfg.id, traces, {{
    ...tl, barmode: 'group', bargap: 0.2,
    title: {{ text: cfg.title + (autoZoom ? ' (auto-zoomed, double-click to reset)' : ''), font: {{ size: 14 }} }},
    xaxis: {{ ...tl.xaxis, title: cfg.xaxis || '', fixedrange: true }},
    yaxis: {{ ...tl.yaxis, title: 'Time (' + u.label + ')', rangemode: 'tozero',
              ...(autoZoom ? {{ range: [0, zoomMax] }} : {{}}) }},
    legend: {{ ...tl.legend, title: {{ text: 'Implementation' }} }},
    margin: {{ l: 60, r: 20, t: 50, b: 50 }},
  }}, plotlyConfig);
  pinZeroAxis(cfg.id, 'yaxis');
}}

function renderBarHColored(cfg) {{
  const maxV = Math.max(...cfg.items.map(i => i.time_ns));
  const minV = Math.min(...cfg.items.filter(i => i.time_ns > 0).map(i => i.time_ns));
  const u = autoUnit(maxV);
  const useLog = maxV / minV > 100;
  // Reverse for top-to-bottom display
  const items = [...cfg.items].reverse();
  const trace = {{
    type: 'bar', orientation: 'h',
    y: items.map(i => i.name), x: items.map(i => useLog ? i.time_ns : i.time_ns / u.scale),
    marker: {{ color: items.map(i => i.color), opacity: 0.85 }},
    hovertemplate: useLog
      ? '%{{y}}: %{{x:.4g}} ns<extra></extra>'
      : '%{{y}}: %{{x:.4g}} ' + u.label + '<extra></extra>',
  }};
  const tl = themeLayout();
  const el = document.getElementById('chart-' + cfg.id);
  if (el) el.style.minHeight = Math.max(350, cfg.items.length * 24 + 100) + 'px';

  const layout = {{
    ...tl, showlegend: false,
    title: {{ text: cfg.title, font: {{ size: 14 }} }},
    xaxis: {{ ...tl.xaxis, title: useLog ? 'Time (ns) - log scale' : 'Time (' + u.label + ')',
              type: useLog ? 'log' : 'linear' }},
    yaxis: {{ ...tl.yaxis, automargin: true, tickfont: {{ size: 10 }}, fixedrange: true }},
    margin: {{ l: 250, r: 20, t: 50, b: 50 }}, bargap: 0.3,
  }};

  // Add manual legend if provided
  if (cfg.legend) {{
    layout.showlegend = true;
    // Add invisible traces for legend
    const traces = [trace];
    cfg.legend.forEach(l => {{
      traces.push({{
        type: 'bar', orientation: 'h', x: [null], y: [null],
        marker: {{ color: l.color }}, name: l.name, showlegend: true,
      }});
    }});
    trace.showlegend = false;
    Plotly.newPlot('chart-' + cfg.id, traces, layout, plotlyConfig);
    if (!useLog) pinZeroAxis(cfg.id, 'xaxis');
    return;
  }}
  Plotly.newPlot('chart-' + cfg.id, [trace], layout, plotlyConfig);
  if (!useLog) pinZeroAxis(cfg.id, 'xaxis');
}}

function renderGroupedBarHError(cfg) {{
  // Reverse for top-to-bottom
  const cats = [...cfg.categories].reverse();
  const traces = cfg.groups.map(g => {{
    const vals = [...g.values].reverse();
    const errs = [...g.errors].reverse();
    // Clamp lower error bars so they don't go below zero (scheduling error can't be negative)
    const errMinus = vals.map((v, i) => Math.min(errs[i], v));
    return {{
      type: 'bar', orientation: 'h', name: g.name,
      y: cats, x: vals,
      error_x: {{ type: 'data', array: errs, arrayminus: errMinus, visible: true }},
      marker: {{ color: g.color, opacity: 0.85 }},
      hovertemplate: '%{{y}}: %{{x:.2f}} +/- %{{error_x.array:.2f}} us<extra>%{{fullData.name}}</extra>',
    }};
  }});
  const tl = themeLayout();
  Plotly.newPlot('chart-' + cfg.id, traces, {{
    ...tl, barmode: 'group', bargap: 0.2,
    title: {{ text: cfg.title, font: {{ size: 14 }} }},
    xaxis: {{ ...tl.xaxis, title: cfg.xaxis_label || 'Mean Error (us)' }},
    yaxis: {{ ...tl.yaxis, automargin: true, fixedrange: true }},
    legend: {{ ...tl.legend }},
    margin: {{ l: 200, r: 20, t: 50, b: 50 }},
  }}, plotlyConfig);
  pinZeroAxis(cfg.id, 'xaxis');
}}

function renderChart(cfg) {{
  switch (cfg.type) {{
    case 'line': renderLine(cfg); break;
    case 'grouped_bar_h': renderGroupedBarH(cfg); break;
    case 'grouped_bar_v': renderGroupedBarV(cfg); break;
    case 'bar_h_colored': renderBarHColored(cfg); break;
    case 'grouped_bar_h_error': renderGroupedBarHError(cfg); break;
  }}
}}

// ─── Platform switching ────────────────────────────────────────
function buildInfoCards(machine) {{
  const compiler = machine.compiler || {{}};
  document.getElementById('infoCards').innerHTML = `
    <div class="info-card"><div class="label">Platform</div><div class="value">${{machine.platform}} ${{machine.architecture||''}}</div><div class="detail">${{machine.platform_release||''}}</div></div>
    <div class="info-card"><div class="label">Processor</div><div class="value">${{machine.cpu_model}}</div><div class="detail">${{machine.cpu_cores}} cores &middot; ${{Math.round(machine.memory_gb||0)}} GB RAM</div></div>
    <div class="info-card"><div class="label">Compiler</div><div class="value">${{compiler.compiler_summary||'Unknown'}}</div><div class="detail">C++${{compiler.cxx_standard||'??'}} &middot; ${{compiler.build_type||''}}</div></div>
    <div class="info-card"><div class="label">Run Date</div><div class="value">${{(machine.timestamp||'').slice(0,10)}}</div><div class="detail">${{(machine.timestamp||'').slice(11,19)}}</div></div>
  `;
}}

function buildSections(charts) {{
  // Group by suite
  const suites = new Map();
  charts.forEach(c => {{
    if (!suites.has(c.suite)) suites.set(c.suite, []);
    suites.get(c.suite).push(c);
  }});

  let html = '';
  suites.forEach((chartList, suite) => {{
    const display = suite.replace(/_/g, ' ').replace(/\\b\\w/g, l => l.toUpperCase());
    html += `<section id="section-${{suite}}" class="benchmark-section"><h2 class="section-title">${{display}}</h2>`;
    chartList.forEach(c => {{
      html += `<div class="chart-container"><div id="chart-${{c.id}}" class="chart"></div></div>`;
    }});
    html += '</section>';
  }});
  document.getElementById('chartSections').innerHTML = html;
}}

function switchPlatform(idx) {{
  currentPlatformIdx = parseInt(idx);
  const p = PLATFORMS[currentPlatformIdx];
  buildInfoCards(p.machine);
  document.getElementById('statsBar').innerHTML = `
    <div class="stat-item"><span class="stat-dot info"></span><span>Suites: <strong>${{new Set(p.charts.map(c=>c.suite)).size}}</strong></span></div>
    <div class="stat-item"><span class="stat-dot ok"></span><span>Charts: <strong>${{p.charts.length}}</strong></span></div>
  `;
  buildSections(p.charts);
  // Small delay so DOM is ready
  requestAnimationFrame(() => {{
    p.charts.forEach(renderChart);
    observeSections();
  }});
}}

function toggleTheme() {{
  currentTheme = currentTheme === 'dark' ? 'light' : 'dark';
  document.documentElement.setAttribute('data-theme', currentTheme);
  document.getElementById('themeBtn').textContent = currentTheme === 'dark' ? 'Light' : 'Dark';
  const p = PLATFORMS[currentPlatformIdx];
  p.charts.forEach(renderChart);
}}

function filterNav(q) {{
  q = q.toLowerCase();
  document.querySelectorAll('.nav-link').forEach(l => {{
    l.style.display = l.textContent.toLowerCase().includes(q) ? 'block' : 'none';
  }});
}}

let observer;
function observeSections() {{
  if (observer) observer.disconnect();
  observer = new IntersectionObserver(entries => {{
    entries.forEach(e => {{
      if (e.isIntersecting) {{
        const id = e.target.id.replace('section-', '');
        document.querySelectorAll('.nav-link').forEach(l => {{
          l.classList.toggle('active', l.dataset.suite === id);
        }});
      }}
    }});
  }}, {{ rootMargin: '-20% 0px -70% 0px' }});
  document.querySelectorAll('.benchmark-section').forEach(s => observer.observe(s));
}}

// ─── Init ──────────────────────────────────────────────────────
const sel = document.getElementById('platformSelect');
PLATFORMS.forEach((p, i) => {{
  const opt = document.createElement('option');
  opt.value = i; opt.textContent = p.label;
  sel.appendChild(opt);
}});
switchPlatform(0);
</script>
</body>
</html>
"""

    with open(output_path, "w") as f:
        f.write(html)
    print(f"Generated: {output_path}")


# ─── Main ───────────────────────────────────────────────────────────────────


def load_platform(json_path):
    """Load a JSON file and build chart configs."""
    with open(json_path) as f:
        data = json.load(f)

    machine = data["machine_info"]
    platform_id = machine.get("platform_id", Path(json_path).stem)
    label = (
        f"{machine.get('cpu_model', 'Unknown')} ({machine.get('cpu_cores', '?')} cores)"
    )

    charts = []
    for result in data["results"]:
        if not result.get("success") or "data" not in result:
            continue
        suite = result["name"].replace(".exe", "").replace("_benchmark", "")
        benchmarks = result["data"].get("benchmarks", [])
        charts.extend(build_charts_for_suite(benchmarks, suite))

    return {
        "id": platform_id,
        "label": label,
        "machine": machine,
        "charts": charts,
    }


def main():
    parser = argparse.ArgumentParser(
        description="Generate interactive benchmark dashboard"
    )
    parser.add_argument("inputs", nargs="+", type=Path, help="Benchmark JSON file(s)")
    parser.add_argument(
        "-o",
        "--output",
        type=Path,
        default=Path("/tmp/dispenso-benchmarks.html"),
        help="Output HTML file",
    )
    args = parser.parse_args()

    platforms = []
    for path in args.inputs:
        print(f"Loading: {path}")
        platforms.append(load_platform(path))
        print(
            f"  Platform: {platforms[-1]['label']}, Charts: {len(platforms[-1]['charts'])}"
        )

    generate_html(platforms, args.output)
    print(f"\nTotal platforms: {len(platforms)}")


if __name__ == "__main__":
    main()


================================================
FILE: scripts/run_benchmarks.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""
Benchmark runner for dispenso.

This script discovers and runs the dispenso benchmarks, collecting results
into a JSON file that includes machine information for reproducibility.

Usage:
    python run_benchmarks.py                          # run only (existing build)
    python run_benchmarks.py --build                  # build + run all
    python run_benchmarks.py --build -B pipeline      # build + run matching
    python run_benchmarks.py --build --cmake-args="-DCMAKE_CXX_COMPILER=g++-13"

The output JSON can then be processed by generate_charts.py to create
visualizations and reports.

Requirements:
    None (matplotlib/pandas only needed for generate_charts.py)
"""

import argparse
import json
import os
import platform
import re
import shutil
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional

SCRIPT_DIR = Path(__file__).parent
DEFAULT_SOURCE_DIR = SCRIPT_DIR.parent
DEFAULT_BUILD_DIR = Path(tempfile.gettempdir()) / "dispenso-build"


def get_machine_info() -> Dict[str, Any]:
    """Gather machine information for benchmark context."""
    info = {
        "timestamp": datetime.now().isoformat(),
        "platform": platform.system(),
        "platform_release": platform.release(),
        "platform_version": platform.version(),
        "architecture": platform.machine(),
        "processor": platform.processor(),
        "python_version": platform.python_version(),
    }

    # Try to get more detailed CPU info
    if platform.system() == "Linux":
        try:
            with open("/proc/cpuinfo", "r") as f:
                cpuinfo = f.read()
            # Extract model name
            match = re.search(r"model name\s*:\s*(.+)", cpuinfo)
            if match:
                info["cpu_model"] = match.group(1).strip()
            # Count cores
            info["cpu_cores"] = cpuinfo.count("processor\t:")
            # Get cache info
            match = re.search(r"cache size\s*:\s*(.+)", cpuinfo)
            if match:
                info["cache_size"] = match.group(1).strip()
        except Exception as e:
            info["cpu_info_error"] = str(e)

        # Memory info
        try:
            with open("/proc/meminfo", "r") as f:
                meminfo = f.read()
            match = re.search(r"MemTotal:\s*(\d+)", meminfo)
            if match:
                info["memory_kb"] = int(match.group(1))
                info["memory_gb"] = round(int(match.group(1)) / 1024 / 1024, 1)
        except Exception:
            pass

    elif platform.system() == "Darwin":  # macOS
        try:
            result = subprocess.run(
                ["sysctl", "-n", "machdep.cpu.brand_string"],
                capture_output=True,
                text=True,
            )
            if result.returncode == 0:
                info["cpu_model"] = result.stdout.strip()

            result = subprocess.run(
                ["sysctl", "-n", "hw.ncpu"], capture_output=True, text=True
            )
            if result.returncode == 0:
                info["cpu_cores"] = int(result.stdout.strip())

            result = subprocess.run(
                ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True
            )
            if result.returncode == 0:
                mem_bytes = int(result.stdout.strip())
                info["memory_gb"] = round(mem_bytes / 1024 / 1024 / 1024, 1)
        except Exception as e:
            info["cpu_info_error"] = str(e)

    elif platform.system() == "Windows":
        try:
            import winreg

            key = winreg.OpenKey(
                winreg.HKEY_LOCAL_MACHINE,
                r"HARDWARE\DESCRIPTION\System\CentralProcessor\0",
            )
            info["cpu_model"] = winreg.QueryValueEx(key, "ProcessorNameString")[0]
            winreg.CloseKey(key)
        except Exception:
            pass

        try:
            result = subprocess.run(
                ["wmic", "computersystem", "get", "totalphysicalmemory"],
                capture_output=True,
                text=True,
            )
            lines = result.stdout.strip().split("\n")
            if len(lines) > 1:
                mem_bytes = int(lines[1].strip())
                info["memory_gb"] = round(mem_bytes / 1024 / 1024 / 1024, 1)
        except Exception:
            pass

        info["cpu_cores"] = os.cpu_count()

    return info


def generate_platform_id(machine_info: Dict[str, Any]) -> str:
    """Generate a human-readable platform identifier from machine info.

    Examples: linux-threadripper-96c, macos-m4-12c, windows-zen4-24c
    """
    system = machine_info.get("platform", "unknown").lower()
    if system == "darwin":
        system = "macos"

    cpu_model = machine_info.get("cpu_model", "")
    cores = machine_info.get("cpu_cores", os.cpu_count() or 0)
    model_lower = cpu_model.lower()

    # Extract CPU family identifier
    cpu_id = "unknown"
    if system == "macos":
        # Apple Silicon: "Apple M4", "Apple M2 Pro", etc.
        match = re.search(r"apple\s+(m\d+(?:\s+\w+)?)", model_lower)
        if match:
            cpu_id = match.group(1).replace(" ", "-")
    elif "threadripper" in model_lower:
        cpu_id = "threadripper"
    elif "epyc" in model_lower:
        cpu_id = "epyc"
    elif "ryzen" in model_lower:
        match = re.search(r"ryzen\s+(\d+)\s+(\d{4})", model_lower)
        if match:
            cpu_id = f"ryzen{match.group(1)}-{match.group(2)}"
        else:
            cpu_id = "ryzen"
    elif "xeon" in model_lower:
        cpu_id = "xeon"
    elif re.search(r"core.*i\d", model_lower):
        match = re.search(r"i(\d+)-(\d+)", model_lower)
        if match:
            cpu_id = f"i{match.group(1)}-{match.group(2)}"
        else:
            cpu_id = "core"
    elif "zen" in model_lower:
        match = re.search(r"zen\s*(\d+)", model_lower)
        if match:
            cpu_id = f"zen{match.group(1)}"

    return f"{system}-{cpu_id}-{cores}c"


def _parse_cmake_cache(cache_path: Path, var_map: list) -> Dict[str, str]:
    """Parse CMakeCache.txt for specified variables.

    Args:
        cache_path: Path to CMakeCache.txt
        var_map: List of (cmake_var, label) tuples

    Returns:
        Dict mapping labels to their values.
    """
    info: Dict[str, str] = {}
    try:
        cache = cache_path.read_text()
        for var, label in var_map:
            match = re.search(rf"^{re.escape(var)}:\w+=(.+)", cache, re.MULTILINE)
            if match:
                val = match.group(1).strip()
                if val:
                    info[label] = val
    except Exception:
        pass
    return info


def _parse_compiler_cmake(cmake_files_dir: Path, var_map: list) -> Dict[str, str]:
    """Parse CMakeCXXCompiler.cmake for compiler ID and version.

    Args:
        cmake_files_dir: Path to CMakeFiles/ directory
        var_map: List of (cmake_var, label) tuples to look for

    Returns:
        Dict mapping labels to their values.
    """
    info: Dict[str, str] = {}
    if not cmake_files_dir.is_dir():
        return info
    for child in cmake_files_dir.iterdir():
        compiler_cmake = child / "CMakeCXXCompiler.cmake"
        if compiler_cmake.exists():
            try:
                text = compiler_cmake.read_text()
                for var, label in var_map:
                    match = re.search(rf'set\({re.escape(var)}\s+"([^"]+)"\)', text)
                    if match:
                        info[label] = match.group(1)
            except Exception:
                pass
            break
    return info


def _build_compiler_summary(info: Dict[str, str]) -> str:
    """Assemble a human-readable compiler summary string."""
    parts = []
    if "compiler_id" in info:
        parts.append(info["compiler_id"])
    if "compiler_version" in info:
        parts.append(info["compiler_version"])
    if "build_type" in info:
        parts.append(info["build_type"])
    if "cxx_standard" in info:
        parts.append(f"C++{info['cxx_standard']}")
    return " ".join(parts) if parts else ""


def get_compiler_info(build_dir: Path) -> Dict[str, str]:
    """Detect compiler and build settings from the CMake build directory."""
    # Locate the cmake build root (handle build_dir pointing at bin/)
    cmake_root = None
    for candidate in [build_dir, build_dir.parent]:
        if (candidate / "CMakeCache.txt").exists():
            cmake_root = candidate
            break

    if cmake_root is None:
        return {}

    # Parse CMakeCache.txt for build settings & compiler path
    cache_vars = [
        ("CMAKE_CXX_COMPILER", "compiler_path"),
        ("CMAKE_CXX_COMPILER_ID", "compiler_id"),
        ("CMAKE_CXX_COMPILER_VERSION", "compiler_version"),
        ("CMAKE_BUILD_TYPE", "build_type"),
        ("CMAKE_CXX_STANDARD", "cxx_standard"),
        ("CMAKE_CXX_FLAGS", "cxx_flags"),
    ]
    info = _parse_cmake_cache(cmake_root / "CMakeCache.txt", cache_vars)

    # Fall back to CMakeCXXCompiler.cmake for compiler ID & version
    if "compiler_id" not in info or "compiler_version" not in info:
        id_version_vars = [
            ("CMAKE_CXX_COMPILER_ID", "compiler_id"),
            ("CMAKE_CXX_COMPILER_VERSION", "compiler_version"),
        ]
        # Only fill missing values
        fallback = _parse_compiler_cmake(cmake_root / "CMakeFiles", id_version_vars)
        for label, val in fallback.items():
            if label not in info:
                info[label] = val

    summary = _build_compiler_summary(info)
    if summary:
        info["compiler_summary"] = summary

    return info


def _discover_benchmark_targets(build_dir: Path, pattern: str) -> List[str]:
    """Discover benchmark targets matching a regex pattern.

    Uses cmake --build --target help to list available targets, then filters
    for benchmark targets matching the pattern.  Returns an empty list if
    target discovery fails (caller should fall back to building all).
    """
    try:
        result = subprocess.run(
            ["cmake", "--build", str(build_dir), "--target", "help"],
            capture_output=True,
            text=True,
        )
        if result.returncode != 0:
            return []

        targets = []
        for line in result.stdout.splitlines():
            line = line.strip()
            if "benchmark" not in line.lower():
                continue
            # cmake --target help format: "... target_name" (Makefiles)
            # or just "target_name" (Ninja)
            match = re.search(r"(?:\.\.\.\s+)?(\S*benchmark\S*)", line, re.IGNORECASE)
            if match:
                target = match.group(1)
                if re.search(pattern, target):
                    targets.append(target)
        return targets
    except Exception:
        return []


def configure_and_build(
    source_dir: Path,
    build_dir: Path,
    jobs: int,
    clean: bool = False,
    benchmark_filter: Optional[str] = None,
    extra_cmake_args: Optional[List[str]] = None,
) -> bool:
    """Configure and build dispenso benchmarks.

    Args:
        source_dir: Path to dispenso source root.
        build_dir: Path to cmake build directory.
        jobs: Number of parallel build jobs.
        clean: If True, wipe build_dir before configuring.
        benchmark_filter: If set, attempt to build only matching benchmark targets.
        extra_cmake_args: Additional cmake flags (e.g., -DCMAKE_CXX_COMPILER=g++-13).
            Pass -G "generator" here to override CMake's default generator.

    Returns:
        True on success, False on failure.
    """
    if clean and build_dir.exists():
        print(f"Cleaning build directory: {build_dir}")
        shutil.rmtree(build_dir)

    build_dir.mkdir(parents=True, exist_ok=True)

    # Configure
    cmake_cmd = [
        "cmake",
        "-S",
        str(source_dir),
        "-B",
        str(build_dir),
        "-DDISPENSO_BUILD_BENCHMARKS=ON",
        "-DCMAKE_BUILD_TYPE=Release",
        "-DCMAKE_CXX_STANDARD=20",
    ]

    if extra_cmake_args:
        cmake_cmd.extend(extra_cmake_args)

    print("Configuring cmake...")
    print(f"  Source: {source_dir}")
    print(f"  Build:  {build_dir}")
    print(f"  Flags:  {' '.join(cmake_cmd[2:])}")
    sys.stdout.flush()

    result = subprocess.run(cmake_cmd)
    if result.returncode != 0:
        print("Error: cmake configure failed")
        return False

    # Build
    if benchmark_filter:
        targets = _discover_benchmark_targets(build_dir, benchmark_filter)
        if targets:
            print(f"Building {len(targets)} matching target(s): {', '.join(targets)}")
            sys.stdout.flush()
            for target in targets:
                target_cmd = [
                    "cmake",
                    "--build",
                    str(build_dir),
                    "--target",
                    target,
                    f"-j{jobs}",
                    "--config",
                    "Release",
                ]
                result = subprocess.run(target_cmd)
                if result.returncode != 0:
                    print(f"Error: build failed for target {target}")
                    return False
            return True
        else:
            print("No matching targets discovered, building all...")

    print(f"Building with {jobs} parallel job(s)...")
    sys.stdout.flush()
    build_cmd = [
        "cmake",
        "--build",
        str(build_dir),
        f"-j{jobs}",
        "--config",
        "Release",
    ]
    result = subprocess.run(build_cmd)
    if result.returncode != 0:
        print("Error: build failed")
        return False

    return True


def find_benchmarks(build_dir: Path, pattern: Optional[str] = None) -> List[Path]:
    """Find benchmark executables in the build directory."""
    benchmarks = []

    # Search common locations and multi-config subdirectories (Release, Debug, etc.)
    _config_subdirs = ["Release", "RelWithDebInfo", "Debug", "MinSizeRel"]
    search_paths = []
    for base in [build_dir / "bin", build_dir / "benchmarks", build_dir]:
        search_paths.append(base)
        for cfg in _config_subdirs:
            search_paths.append(base / cfg)

    for search_path in search_paths:
        if not search_path.exists():
            continue

        for f in search_path.iterdir():
            if not f.is_file():
                continue
            name = f.name
            # Only consider executable files (on Windows, must end with .exe)
            if platform.system() == "Windows" and not name.endswith(".exe"):
                continue
            if "benchmark" in name.lower():
                if pattern is None or re.search(pattern, name):
                    benchmarks.append(f)

    # Remove duplicates, preferring Release over Debug
    seen = {}
    for bench in benchmarks:
        key = bench.name
        if key not in seen:
            seen[key] = bench
        else:
            # Prefer Release build
            if "Release" in str(bench) and "Debug" in str(seen[key]):
                seen[key] = bench

    return sorted(seen.values())


def run_benchmark(
    benchmark_path: Path,
    extra_args: Optional[List[str]] = None,
    filter_pattern: Optional[str] = None,
    env_override: Optional[Dict[str, str]] = None,
    name_suffix: str = "",
) -> Dict[str, Any]:
    """Run a single benchmark and return results."""
    args = [str(benchmark_path), "--benchmark_format=json"]
    if extra_args:
        args.extend(extra_args)

    # Apply filter if provided
    if filter_pattern:
        args.append(f"--benchmark_filter={filter_pattern}")

    # Special handling for nested_for_benchmark - exclude slow async cases
    if benchmark_path.stem == "nested_for_benchmark" and not filter_pattern:
        # Exclude async benchmarks (extremely slow, not competitive)
        # Only include serial, omp, tbb, and dispenso tests
        args.append("--benchmark_filter=BM_(serial|omp|tbb|dispenso)")

    result_name = benchmark_path.name + name_suffix

    # Set up environment
    env = None
    if env_override:
        env = os.environ.copy()
        env.update(env_override)

    print(f"Running: {benchmark_path.name}...")

    try:
        result = subprocess.run(
            args,
            capture_output=True,
            text=True,
            timeout=1800,  # 30 minute timeout per benchmark
            env=env,
        )

        if result.returncode != 0:
            error_msg = (
                result.stderr or result.stdout or f"exit code {result.returncode}"
            )
            return {
                "name": benchmark_path.name,
                "success": False,
                "error": error_msg,
            }

        # Parse JSON output
        # google benchmark outputs JSON to stdout
        try:
            data = json.loads(result.stdout)
            return {
                "name": benchmark_path.name,
                "success": True,
                "data": data,
            }
        except json.JSONDecodeError:
            # Maybe it's not a google benchmark, try to parse stdout
            return {
                "name": benchmark_path.name,
                "success": True,
                "raw_output": result.stdout,
            }

    except subprocess.TimeoutExpired:
        return {
            "name": benchmark_path.name,
            "success": False,
            "error": "Timeout after 1800 seconds",
        }
    except Exception as e:
        return {
            "name": benchmark_path.name,
            "success": False,
            "error": str(e),
        }


def _build_extra_benchmark_args(args) -> List[str]:
    """Build the extra benchmark arguments list from parsed CLI args."""
    extra_args = []
    if args.min_time is not None:
        extra_args.append(f"--benchmark_min_time={args.min_time}s")
    if args.repetitions is not None:
        extra_args.append(f"--benchmark_repetitions={args.repetitions}")
    return extra_args


def _build_windows_env_override(
    cmake_args: Optional[List[str]],
) -> Optional[Dict[str, str]]:
    """Build PATH environment override for Windows DLL discovery.

    On Windows, adds bin/ dirs from CMAKE_PREFIX_PATH so vcpkg DLLs
    (TBB, etc.) are found at benchmark runtime.
    """
    if platform.system() != "Windows" or not cmake_args:
        return None
    extra_bin_dirs = []
    for arg in cmake_args:
        if arg.startswith("-DCMAKE_PREFIX_PATH="):
            prefix = arg.split("=", 1)[1].strip("\"'")
            for p in prefix.split(";"):
                extra_bin_dirs.append(str(Path(p) / "bin"))
    if not extra_bin_dirs:
        return None
    return {
        "PATH": os.pathsep.join(extra_bin_dirs)
        + os.pathsep
        + os.environ.get("PATH", "")
    }


def main():
    parser = argparse.ArgumentParser(
        description="Run dispenso benchmarks and output results as JSON"
    )
    parser.add_argument(
        "--build",
        action="store_true",
        help="Configure and build benchmarks before running",
    )
    parser.add_argument(
        "--source-dir",
        "-s",
        type=Path,
        default=DEFAULT_SOURCE_DIR,
        help="Source directory (dispenso root)",
    )
    parser.add_argument(
        "--build-dir",
        "-b",
        type=Path,
        default=DEFAULT_BUILD_DIR,
        help="Build directory containing benchmark executables",
    )
    parser.add_argument(
        "--output",
        "-o",
        type=Path,
        default=Path("benchmark_results.json"),
        help="Output JSON file for results",
    )
    parser.add_argument(
        "--benchmarks",
        "-B",
        type=str,
        default=None,
        help="Regex pattern to filter which benchmark executables to run",
    )
    parser.add_argument(
        "--filter",
        "-f",
        type=str,
        default=None,
        help="Filter pattern passed to --benchmark_filter for individual tests",
    )
    parser.add_argument(
        "--min-time",
        type=float,
        default=None,
        help="Minimum time per benchmark in seconds (passed to --benchmark_min_time)",
    )
    parser.add_argument(
        "--tcmalloc",
        type=str,
        default=None,
        help="Path to libtcmalloc.so for LD_PRELOAD tests (e.g., /usr/lib64/libtcmalloc.so.4)",
    )
    parser.add_argument(
        "--jobs",
        "-j",
        type=int,
        default=os.cpu_count() or 8,
        help=f"Number of parallel build jobs (default: {os.cpu_count() or 8})",
    )
    parser.add_argument(
        "--clean",
        action="store_true",
        help="Wipe the build directory before configuring (requires --build)",
    )
    parser.add_argument(
        "--cmake-args",
        action="append",
        default=None,
        metavar="ARG",
        help="Extra cmake argument, can be repeated "
        '(e.g., --cmake-args="-DCMAKE_CXX_COMPILER=g++-13")',
    )
    parser.add_argument(
        "--repetitions",
        "-r",
        type=int,
        default=None,
        help="Number of repetitions per benchmark (passed to --benchmark_repetitions). "
        "Produces mean/median/stddev/cv aggregate rows in the output.",
    )
    parser.add_argument(
        "--platform",
        "-p",
        type=str,
        default=None,
        help="Platform identifier (e.g., 'linux-threadripper-96c'). "
        "Auto-detected from machine info if not specified.",
    )

    args = parser.parse_args()

    # Gather machine info
    print("Gathering machine information...")
    machine_info = get_machine_info()
    print(f"  CPU: {machine_info.get('cpu_model', 'unknown')}")
    print(f"  Hardware Threads: {machine_info.get('cpu_cores', 'unknown')}")
    print(f"  Memory: {machine_info.get('memory_gb', 'unknown')} GB")

    # Platform identifier (auto-detect or user-specified)
    platform_id = args.platform or generate_platform_id(machine_info)
    machine_info["platform_id"] = platform_id
    print(f"  Platform: {platform_id}")

    # Build if requested
    if args.build:
        print()
        if not configure_and_build(
            source_dir=args.source_dir.resolve(),
            build_dir=args.build_dir.resolve(),
            jobs=args.jobs,
            clean=args.clean,
            benchmark_filter=args.benchmarks,
            extra_cmake_args=args.cmake_args,
        ):
            sys.exit(1)

    # Gather compiler info from the build directory
    compiler_info = get_compiler_info(args.build_dir)
    if compiler_info:
        machine_info["compiler"] = compiler_info
        summary = compiler_info.get("compiler_summary", "unknown")
        print(f"  Compiler: {summary}")
    print()

    # Find benchmarks
    benchmarks = find_benchmarks(args.build_dir, args.benchmarks)
    if not benchmarks:
        print(f"No benchmarks found in {args.build_dir}")
        print("Make sure you've built with -DDISPENSO_BUILD_BENCHMARKS=ON")
        sys.exit(1)

    print(f"Found {len(benchmarks)} benchmark(s):")
    for b in benchmarks:
        print(f"  - {b.name}")
    print()

    # Run benchmarks
    extra_args = _build_extra_benchmark_args(args)
    env_override = _build_windows_env_override(args.cmake_args)

    results = []
    for benchmark in benchmarks:
        result = run_benchmark(
            benchmark,
            extra_args if extra_args else None,
            args.filter,
            env_override=env_override,
        )
        results.append(result)
        if result["success"]:
            print(f"  OK {benchmark.name}")
        else:
            print(
                f"  FAIL {benchmark.name}: {result.get('error', 'unknown error')[:200]}"
            )

    # Save results
    args.output.parent.mkdir(parents=True, exist_ok=True)

    output_data = {
        "machine_info": machine_info,
        "results": results,
    }

    with open(args.output, "w") as f:
        json.dump(output_data, f, indent=2, default=str)
    print(f"\nSaved results to: {args.output}")

    # Print summary
    successful = sum(1 for r in results if r.get("success"))
    print(f"\nSummary: {successful}/{len(results)} benchmarks succeeded")

    if successful < len(results):
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: scripts/update_benchmarks.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""
One-stop-shop for updating benchmarks and documentation.

This script orchestrates the full benchmark workflow:
1. Configures and builds dispenso with benchmarks enabled
2. Runs benchmarks via run_benchmarks.py
3. Generates charts via generate_charts.py
4. Copies charts to docs/benchmarks/ with README-compatible names
5. Updates benchmark_results.md with detailed results

Multi-platform compose mode:
  Accepts pre-generated JSON results from multiple platforms and generates
  per-platform subdirectories with a unified landing page.

Usage:
    # Full run (build + run + charts) for current platform
    python update_benchmarks.py --platform linux-threadripper

    # Use existing build directory
    python update_benchmarks.py -b /path/to/build --skip-build --platform linux-threadripper

    # Run specific benchmarks only
    python update_benchmarks.py -B "concurrent_vector|pipeline"

    # Compose mode: generate docs from multiple platform results
    python update_benchmarks.py --compose \\
        linux-threadripper:results/linux.json \\
        macos-m2:results/macos.json \\
        windows-zen4:results/windows.json

Requirements:
    pip install matplotlib pandas
"""

import argparse
import json
import os
import shutil
import subprocess
import sys
from pathlib import Path

SCRIPT_DIR = Path(__file__).parent
SOURCE_DIR = SCRIPT_DIR.parent
DOCS_DIR = SOURCE_DIR / "docs" / "benchmarks"
DEFAULT_BUILD_DIR = Path("/tmp/dispenso-benchmark-build")

# Chart name mapping: generated name -> README expected name
# The README.md references specific filenames that we need to maintain
README_CHART_MAPPING = {
    # future benchmark generates future_zoomed_chart.png, README expects par_tree_build.png
    "future_zoomed_chart.png": "par_tree_build.png",
    # simple_for 1M zoomed is used for README parallel for section (shows competitive cases)
    "simple_for_1000000_zoomed_chart.png": "simple_for.png",
    # pipeline chart
    "pipeline_chart.png": "pipeline.png",
    # concurrent_vector parallel chart maps to the README name
    "concurrent_vector_parallel_chart.png": "concurrent_vector.png",
    # nested_for charts map to README expected names
    "nested_for_10_chart.png": "nested_for_small.png",
    "nested_for_500_chart.png": "nested_for_medium.png",
    "nested_for_3000_chart.png": "nested_for_large.png",
}


def run_command(cmd: list, description: str, cwd: Path = None) -> bool:
    """Run a command and return success status."""
    print(f"  Running: {' '.join(str(c) for c in cmd)}")
    result = subprocess.run(cmd, cwd=cwd)
    if result.returncode != 0:
        print(f"  Error: {description} failed with code {result.returncode}")
        return False
    return True


def configure_build(build_dir: Path, source_dir: Path, jobs: int) -> bool:
    """Configure cmake build with benchmarks enabled."""
    build_dir.mkdir(parents=True, exist_ok=True)

    cmake_cmd = [
        "cmake",
        str(source_dir),
        "-DDISPENSO_BUILD_BENCHMARKS=ON",
        "-DCMAKE_BUILD_TYPE=Release",
        "-DCMAKE_CXX_STANDARD=20",  # C++20 for folly and taskflow 3.x support
        "-DBENCHMARK_WITHOUT_FOLLY=OFF",
    ]

    print("  CMake configuration:")
    print("    - C++20 (for folly and taskflow support)")
    print("    - Release mode (-O3)")
    print("    - Folly enabled")
    print()

    return run_command(cmake_cmd, "CMake configure", cwd=build_dir)


def build_benchmarks(build_dir: Path, jobs: int) -> bool:
    """Build the benchmarks."""
    make_cmd = ["make", f"-j{jobs}"]
    return run_command(make_cmd, "Build", cwd=build_dir)


def copy_all_charts(charts_dir: Path, docs_dir: Path) -> int:
    """Copy all charts and markdown files from charts_dir to docs_dir.

    Returns count of files copied.
    """
    docs_dir.mkdir(parents=True, exist_ok=True)
    count = 0

    # Copy all PNG files
    for src_path in charts_dir.glob("*.png"):
        dst_path = docs_dir / src_path.name
        shutil.copy2(src_path, dst_path)
        count += 1

    # Copy all markdown detail files
    for src_path in charts_dir.glob("*_details.md"):
        dst_path = docs_dir / src_path.name
        shutil.copy2(src_path, dst_path)
        count += 1

    # Copy benchmark report as benchmark_results.md
    report_src = charts_dir / "benchmark_report.md"
    if report_src.exists():
        shutil.copy2(report_src, docs_dir / "benchmark_results.md")
        count += 1

    return count


def copy_readme_charts(charts_dir: Path, docs_dir: Path) -> list:
    """Copy charts with README-compatible names.

    Returns list of (src, dst) pairs that were copied.
    """
    copied = []

    for src_name, dst_name in README_CHART_MAPPING.items():
        src_path = charts_dir / src_name
        dst_path = docs_dir / dst_name
        if src_path.exists():
            shutil.copy2(src_path, dst_path)
            copied.append((src_name, dst_name))
            print(f"  Copied: {src_name} -> {dst_name}")
        else:
            print(f"  Warning: Source chart not found: {src_name}")

    return copied


def generate_landing_page(docs_dir: Path, platforms: list) -> Path:
    """Generate a landing page linking to all per-platform benchmark results.

    Args:
        docs_dir: The docs/benchmarks/ directory.
        platforms: List of dicts with keys: name, cpu, threads, memory, timestamp.

    Returns:
        Path to the generated landing page.
    """
    landing_path = docs_dir / "benchmark_results.md"

    with open(landing_path, "w") as f:
        f.write("# Dispenso Benchmark Results\n\n")
        f.write("Benchmark results across multiple platforms.\n\n")
        f.write("## Platforms\n\n")
        f.write("| Platform | CPU | Threads | Memory | Date |\n")
        f.write("|----------|-----|---------|--------|------|\n")

        for p in platforms:
            report_link = f"[{p['name']}]({p['name']}/benchmark_results.md)"
            f.write(
                f"| {report_link} | {p.get('cpu', '?')} | "
                f"{p.get('threads', '?')} | {p.get('memory', '?')} GB | "
                f"{p.get('timestamp', '?')} |\n"
            )

        f.write("\n---\n\n")
        f.write("Each platform directory contains:\n")
        f.write("- `benchmark_results.md` — Full benchmark report with charts\n")
        f.write("- `*_details.md` — Per-suite detailed result tables\n")
        f.write("- `*.png` — Individual chart images\n")

    print(f"Generated landing page: {landing_path}")
    return landing_path


def compose_platforms(platform_specs: list, docs_dir: Path) -> bool:
    """Compose multi-platform benchmark results.

    For each platform, runs generate_charts.py with --platform, then copies
    results to docs/benchmarks/<platform>/. Finally generates a landing page.

    Args:
        platform_specs: List of "platform_name:json_path" strings.
        docs_dir: Target docs/benchmarks/ directory.

    Returns:
        True on success.
    """
    platforms = []

    for spec in platform_specs:
        if ":" not in spec:
            print(
                f"Error: Invalid platform spec '{spec}'. Expected 'name:path/to/results.json'"
            )
            return False

        name, json_path_str = spec.split(":", 1)
        json_path = Path(json_path_str).resolve()

        if not json_path.exists():
            print(f"Error: Results file not found: {json_path}")
            return False

        # Read machine info from JSON
        with open(json_path) as f:
            data = json.load(f)
        machine_info = data.get("machine_info", {})

        platforms.append(
            {
                "name": name,
                "json_path": json_path,
                "cpu": machine_info.get("cpu_model", "unknown"),
                "threads": machine_info.get("cpu_cores", "?"),
                "memory": machine_info.get("memory_gb", "?"),
                "timestamp": machine_info.get("timestamp", "unknown"),
            }
        )

    print(f"Composing results for {len(platforms)} platform(s):")
    for p in platforms:
        print(f"  {p['name']}: {p['json_path']} ({p['cpu']}, {p['threads']} threads)")
    print()

    # Generate charts for each platform
    for p in platforms:
        print(f"Generating charts for {p['name']}...")
        print("-" * 40)

        charts_dir = docs_dir / p["name"]
        chart_cmd = [
            sys.executable,
            str(SCRIPT_DIR / "generate_charts.py"),
            "-i",
            str(p["json_path"]),
            "-o",
            str(charts_dir),
        ]

        result = subprocess.run(chart_cmd)
        if result.returncode != 0:
            print(f"Error: Chart generation failed for {p['name']}")
            return False
        print()

    # Copy README-compatible charts from the first platform (primary)
    primary = platforms[0]
    primary_charts = docs_dir / primary["name"]
    print(f"Copying README charts from primary platform ({primary['name']})...")
    print("-" * 40)
    copied = copy_readme_charts(primary_charts, docs_dir)
    print(f"  Created {len(copied)} README-compatible chart names")
    print()

    # Generate landing page
    print("Generating landing page...")
    print("-" * 40)
    generate_landing_page(docs_dir, platforms)
    print()

    return True


def _step_build(build_dir: Path, source_dir: Path, jobs: int, skip: bool) -> bool:
    """Handle the build steps (configure + build). Returns True on success."""
    if skip:
        print("Step 1-2: Skipping build (--skip-build)")
        if not (build_dir / "bin").exists():
            print(f"Error: Build directory does not contain bin/: {build_dir}")
            return False
        print()
        return True

    print("Step 1: Configuring cmake...")
    print("-" * 40)
    if not configure_build(build_dir, source_dir, jobs):
        return False
    print()

    print("Step 2: Building benchmarks...")
    print("-" * 40)
    if not build_benchmarks(build_dir, jobs):
        return False
    print()
    return True


def _step_run(
    json_path: Path,
    build_dir: Path,
    benchmarks: str,
    min_time: float,
    skip: bool,
    step_num: int,
) -> bool:
    """Handle the benchmark run step. Returns True on success."""
    output_dir = json_path.parent
    output_dir.mkdir(parents=True, exist_ok=True)

    if skip:
        print(f"Step {step_num}: Skipping benchmark run (--skip-run)")
        if not json_path.exists():
            print(f"Error: Results file not found: {json_path}")
            return False
        print()
        return True

    print(f"Step {step_num}: Running benchmarks...")
    print("-" * 40)

    run_cmd = [
        sys.executable,
        str(SCRIPT_DIR / "run_benchmarks.py"),
        "-b",
        str(build_dir),
        "-o",
        str(json_path),
    ]
    if benchmarks:
        run_cmd.extend(["-B", benchmarks])
    if min_time:
        run_cmd.extend(["--min-time", str(min_time)])

    result = subprocess.run(run_cmd)
    if result.returncode != 0:
        print("Error: Benchmark run failed")
        return False
    print()
    return True


def _step_generate_charts(
    json_path: Path,
    charts_dir: Path,
    platform_id: str,
    skip: bool,
    step_num: int,
) -> bool:
    """Handle the chart generation step. Returns True on success."""
    if skip:
        print(f"Step {step_num}: Skipping chart generation (--skip-charts)")
        if not charts_dir.exists():
            print(f"Error: Charts directory not found: {charts_dir}")
            return False
        print()
        return True

    print(f"Step {step_num}: Generating charts...")
    print("-" * 40)

    chart_cmd = [
        sys.executable,
        str(SCRIPT_DIR / "generate_charts.py"),
        "-i",
        str(json_path),
        "-o",
        str(charts_dir),
    ]
    if platform_id:
        chart_cmd.extend(["-p", platform_id])

    result = subprocess.run(chart_cmd)
    if result.returncode != 0:
        print("Error: Chart generation failed")
        return False
    print()
    return True


def main():
    parser = argparse.ArgumentParser(
        description="Update dispenso benchmarks and documentation"
    )
    parser.add_argument(
        "--build-dir",
        "-b",
        type=Path,
        default=DEFAULT_BUILD_DIR,
        help=f"Build directory (default: {DEFAULT_BUILD_DIR})",
    )
    parser.add_argument(
        "--source-dir",
        "-s",
        type=Path,
        default=SOURCE_DIR,
        help=f"Dispenso source directory (default: {SOURCE_DIR})",
    )
    parser.add_argument(
        "--output-dir",
        "-o",
        type=Path,
        default=None,
        help="Temporary output directory for results (default: build-dir/benchmark_output)",
    )
    parser.add_argument(
        "--jobs",
        "-j",
        type=int,
        default=os.cpu_count() or 8,
        help=f"Number of parallel build jobs (default: {os.cpu_count() or 8})",
    )
    parser.add_argument(
        "--clean",
        action="store_true",
        help="Clean build directory before building",
    )
    parser.add_argument(
        "--skip-build",
        action="store_true",
        help="Skip cmake configure and build (use existing build)",
    )
    parser.add_argument(
        "--skip-run",
        action="store_true",
        help="Skip running benchmarks (use existing results.json)",
    )
    parser.add_argument(
        "--skip-charts",
        action="store_true",
        help="Skip chart generation (use existing charts)",
    )
    parser.add_argument(
        "--benchmarks",
        "-B",
        type=str,
        default=None,
        help="Regex pattern to filter which benchmark executables to run",
    )
    parser.add_argument(
        "--min-time",
        type=float,
        default=None,
        help="Minimum time per benchmark in seconds",
    )
    parser.add_argument(
        "--platform",
        "-p",
        type=str,
        default=None,
        help="Platform identifier (e.g., 'linux-threadripper'). "
        "Charts go into docs/benchmarks/<platform>/ subdirectory.",
    )
    parser.add_argument(
        "--compose",
        nargs="+",
        metavar="PLATFORM:JSON_PATH",
        help="Compose mode: generate docs from multiple platform results. "
        "Each argument is 'platform-name:/path/to/results.json'. "
        "Example: --compose linux-threadripper:linux.json macos-m2:macos.json",
    )

    args = parser.parse_args()

    # Compose mode: just generate per-platform charts + landing page
    if args.compose:
        print("=" * 60)
        print("Dispenso Multi-Platform Benchmark Compose")
        print("=" * 60)
        print(f"Docs directory: {DOCS_DIR}")
        print()

        if not compose_platforms(args.compose, DOCS_DIR):
            sys.exit(1)

        print("=" * 60)
        print("Compose Complete!")
        print("=" * 60)
        print()
        print("Next steps:")
        print("  1. Review the charts in docs/benchmarks/")
        print("  2. Verify README.md displays correctly")
        print("  3. Run: sl status")
        print("  4. Commit the updated charts")
        print()
        return

    # Normal mode: build + run + generate charts for a single platform
    build_dir = args.build_dir.resolve()
    source_dir = args.source_dir.resolve()
    output_dir = args.output_dir or (build_dir / "benchmark_output")
    json_path = output_dir / "benchmark_results.json"
    charts_dir = output_dir / "charts"

    # When platform is set, docs go into a subdirectory
    docs_dir = DOCS_DIR
    if args.platform:
        docs_dir = DOCS_DIR / args.platform

    print("=" * 60)
    print("Dispenso Benchmark Update")
    print("=" * 60)
    print(f"Source directory: {source_dir}")
    print(f"Build directory:  {build_dir}")
    print(f"Output directory: {output_dir}")
    print(f"Docs directory:   {docs_dir}")
    if args.platform:
        print(f"Platform:         {args.platform}")
    print(f"Parallel jobs:    {args.jobs}")
    print()

    # Step 0: Clean if requested
    if args.clean and build_dir.exists():
        print("Step 0: Cleaning build directory...")
        print("-" * 40)
        shutil.rmtree(build_dir)
        print(f"  Removed: {build_dir}")
        print()

    # Steps 1-2: Configure and build
    if not _step_build(build_dir, source_dir, args.jobs, args.skip_build):
        sys.exit(1)
    step_offset = 0 if args.skip_build else 2

    # Step 3: Run benchmarks
    if not _step_run(
        json_path,
        build_dir,
        args.benchmarks,
        args.min_time,
        args.skip_run,
        3 + step_offset,
    ):
        sys.exit(1)

    # Step 4: Generate charts
    if not _step_generate_charts(
        json_path,
        charts_dir,
        args.platform,
        args.skip_charts,
        4 + step_offset,
    ):
        sys.exit(1)

    # When --platform is used, charts are already in a platform subdir
    src_charts_dir = charts_dir
    if args.platform:
        src_charts_dir = charts_dir / args.platform

    # Step 5: Copy all charts to docs/benchmarks[/<platform>]/
    print(f"Step {5 + step_offset}: Copying all charts to {docs_dir}/...")
    print("-" * 40)

    file_count = copy_all_charts(src_charts_dir, docs_dir)
    print(f"  Copied {file_count} files (charts + markdown)")
    print()

    # Step 6: Copy README-specific charts with mapping
    print(f"Step {6 + step_offset}: Copying README charts with mapped names...")
    print("-" * 40)

    # README charts go to the top-level docs/benchmarks/ regardless of platform
    copied = copy_readme_charts(src_charts_dir, DOCS_DIR)
    print(f"  Created {len(copied)} README-compatible chart names")
    print()

    # Summary
    print("=" * 60)
    print("Update Complete!")
    print("=" * 60)
    print()
    print(f"Charts copied to {docs_dir}/:")
    for src, dst in copied:
        print(f"  {dst}")
    print()
    print("Next steps:")
    print("  1. Review the charts in docs/benchmarks/")
    print("  2. Verify README.md displays correctly")
    print("  3. Run: sl status")
    print("  4. Commit the updated charts")
    print()


if __name__ == "__main__":
    main()


================================================
FILE: scripts/update_package_managers.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""Update dispenso across package manager repositories for a new release.

Downloads the release tarball, computes hashes, updates version/hash references
in each package manager's repo, commits, tests locally, pushes to the user's
fork, and creates PRs.

Recommended usage (interactive guided flow):
    python3 update_package_managers.py --version 1.5.1 --guided

The guided flow walks through every step: updating each manager, running tests,
inspecting diffs, pushing, closing superseded PRs, creating new PRs, and
guiding through any remaining manual steps like CLA signing.

Advanced usage (individual flags):
    python3 update_package_managers.py --version 1.5.1 --managers conan,vcpkg
    python3 update_package_managers.py --version 1.5.1 --dry-run
    python3 update_package_managers.py --version 1.5.1 --skip-test
    python3 update_package_managers.py --version 1.5.1 --skip-push
    python3 update_package_managers.py --version 1.5.1 --create-prs
"""

import argparse
import hashlib
import json
import os
import platform
import re
import shutil
import subprocess
import sys
import tarfile
import tempfile
import urllib.request

GITHUB_REPO = "facebookincubator/dispenso"
MANAGERS = ["conan", "vcpkg", "homebrew", "macports"]

# Upstream repos and default branch names for each manager
UPSTREAM_REPOS = {
    "conan": "conan-io/conan-center-index",
    "vcpkg": "microsoft/vcpkg",
    "homebrew": "Homebrew/homebrew-core",
    "macports": "macports/macports-ports",
}

REPO_DIRS = {
    "conan": "conan-center-index",
    "vcpkg": "vcpkg",
    "homebrew": "homebrew-core",
    "macports": "macports-ports",
}

BRANCH_NAMES = {
    "conan": "package/dispenso",
    "vcpkg": "add-dispenso",
    # homebrew and macports use version-specific branches, set dynamically
}


def parse_args():
    parser = argparse.ArgumentParser(
        description="Update dispenso across package manager repositories."
    )
    parser.add_argument("--version", required=True, help="Release version, e.g. 1.5.0")
    parser.add_argument(
        "--managers",
        default=",".join(MANAGERS),
        help=f"Comma-separated list of managers to update (default: {','.join(MANAGERS)})",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print what would be done without modifying files or pushing",
    )
    parser.add_argument(
        "--skip-test",
        action="store_true",
        help="Skip local verification tests after committing",
    )
    parser.add_argument(
        "--skip-push",
        action="store_true",
        help="Update files, commit, and test but don't push to fork",
    )
    parser.add_argument(
        "--repos-dir",
        default=os.path.expanduser("~/repos"),
        help="Base directory containing cloned repos (default: ~/repos)",
    )
    parser.add_argument(
        "--github-user",
        default="graphicsMan",
        help="GitHub fork username (default: graphicsMan)",
    )
    parser.add_argument(
        "--create-prs",
        action="store_true",
        help="Create PRs on GitHub after pushing (uses gh CLI)",
    )
    parser.add_argument(
        "--guided",
        action="store_true",
        help="Interactive guided flow: walks through every step, prompts "
        "before each action, creates PRs at the end",
    )
    args = parser.parse_args()
    if not re.match(r"^\d+\.\d+\.\d+$", args.version):
        parser.error(
            f"Invalid version format: {args.version!r}. Expected semver like 1.5.1"
        )
    args.managers = [m.strip() for m in args.managers.split(",")]
    for m in args.managers:
        if m not in MANAGERS:
            parser.error(f"Unknown manager: {m}. Choose from: {', '.join(MANAGERS)}")
    return args


def download_and_hash(version):
    """Download the release tarball and compute sha256, sha512, rmd160, and size."""
    url = f"https://github.com/{GITHUB_REPO}/archive/refs/tags/v{version}.tar.gz"
    print(f"Downloading {url} ...")

    tarball_path = os.path.join(tempfile.gettempdir(), f"dispenso-v{version}.tar.gz")

    # Download (or reuse if already present and non-empty)
    if os.path.exists(tarball_path) and os.path.getsize(tarball_path) > 0:
        print(f"  Using cached tarball: {tarball_path}")
    else:
        urllib.request.urlretrieve(url, tarball_path)
        print(f"  Saved to {tarball_path}")

    data = open(tarball_path, "rb").read()
    size = len(data)

    sha256 = hashlib.sha256(data).hexdigest()
    sha512 = hashlib.sha512(data).hexdigest()

    # RIPEMD-160: try hashlib first, fall back to openssl subprocess
    try:
        rmd160 = hashlib.new("ripemd160", data).hexdigest()
    except ValueError:
        # hashlib doesn't support rmd160 on this system, try openssl
        try:
            result = subprocess.run(
                ["openssl", "dgst", "-rmd160", tarball_path],
                capture_output=True,
                text=True,
                check=True,
            )
            # Output format: "RIPEMD-160(file)= <hex>"
            rmd160 = result.stdout.strip().split("= ")[-1]
        except (FileNotFoundError, subprocess.CalledProcessError):
            rmd160 = None
            print("  WARNING: Could not compute RIPEMD-160 (needed for MacPorts)")

    hashes = {
        "sha256": sha256,
        "sha512": sha512,
        "rmd160": rmd160,
        "size": size,
        "url": url,
    }

    print(f"  SHA256:  {sha256}")
    print(f"  SHA512:  {sha512}")
    print(f"  RMD160:  {rmd160}")
    print(f"  Size:    {size}")
    print()

    return tarball_path, hashes


def run(cmd, cwd=None, check=True, dry_run=False, capture=False, env=None):
    """Run a shell command, printing it first. Respects dry_run."""
    if isinstance(cmd, str):
        display = cmd
    else:
        display = " ".join(cmd)

    prefix = "[DRY RUN] " if dry_run else ""
    print(f"  {prefix}$ {display}")

    if dry_run:
        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")

    kwargs = {"cwd": cwd, "check": check}
    if capture:
        kwargs["capture_output"] = True
        kwargs["text"] = True
    if env is not None:
        kwargs["env"] = env

    return subprocess.run(cmd, **kwargs)


def ensure_repo(repos_dir, manager, github_user, dry_run):
    """Clone if missing, add fork remote, fetch upstream."""
    upstream = UPSTREAM_REPOS[manager]
    repo_name = REPO_DIRS[manager]
    repo_dir = os.path.join(repos_dir, repo_name)

    if not os.path.isdir(repo_dir):
        print(f"  Cloning {upstream} ...")
        run(
            ["git", "clone", f"https://github.com/{upstream}.git", repo_dir],
            dry_run=dry_run,
        )
    else:
        print(f"  Repo already exists: {repo_dir}")

    if dry_run:
        return repo_dir

    # Ensure upstream remote points to the right place
    result = subprocess.run(
        ["git", "remote", "get-url", "origin"],
        cwd=repo_dir,
        capture_output=True,
        text=True,
    )
    if result.returncode == 0:
        current_origin = result.stdout.strip()
        # Normalize: also accept SSH format
        if upstream not in current_origin:
            print(f"  WARNING: origin points to {current_origin}, expected {upstream}")

    # Ensure fork remote exists
    result = subprocess.run(
        ["git", "remote", "get-url", "fork"],
        cwd=repo_dir,
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        print(f"  Adding fork remote for {github_user}/{repo_name} ...")
        # Ensure the fork exists on GitHub
        run(
            ["gh", "repo", "fork", upstream, "--clone=false"],
            cwd=repo_dir,
            check=False,
        )
        run(
            [
                "git",
                "remote",
                "add",
                "fork",
                f"git@github.com:{github_user}/{repo_name}.git",
            ],
            cwd=repo_dir,
        )
    else:
        print("  Fork remote already exists")

    # Fetch upstream
    print("  Fetching origin ...")
    run(["git", "fetch", "origin"], cwd=repo_dir)

    return repo_dir


def checkout_branch(repo_dir, branch, dry_run):
    """Determine the default branch, update it, and create/checkout the working branch."""
    if dry_run:
        print(f"  [DRY RUN] Would checkout branch: {branch}")
        return

    # Determine default branch (main or master)
    result = subprocess.run(
        ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
        cwd=repo_dir,
        capture_output=True,
        text=True,
    )
    if result.returncode == 0:
        default_branch = result.stdout.strip().replace("refs/remotes/origin/", "")
    else:
        # Try common names
        for candidate in ["main", "master"]:
            r = subprocess.run(
                ["git", "show-ref", "--verify", f"refs/remotes/origin/{candidate}"],
                cwd=repo_dir,
                capture_output=True,
            )
            if r.returncode == 0:
                default_branch = candidate
                break
        else:
            default_branch = "main"

    # Abort any in-progress rebase (e.g. from a previous failed run)
    rebase_merge = os.path.join(repo_dir, ".git", "rebase-merge")
    rebase_apply = os.path.join(repo_dir, ".git", "rebase-apply")
    if os.path.isdir(rebase_merge) or os.path.isdir(rebase_apply):
        print("  Aborting in-progress rebase from previous run ...")
        run(["git", "rebase", "--abort"], cwd=repo_dir, check=False)

    # Stash any dirty changes so we can switch branches
    result = subprocess.run(
        ["git", "status", "--porcelain"],
        cwd=repo_dir,
        capture_output=True,
        text=True,
    )
    stashed = False
    if result.stdout.strip():
        print("  Stashing dirty changes ...")
        run(["git", "stash", "--include-untracked"], cwd=repo_dir)
        stashed = True

    # Checkout and update default branch
    run(["git", "checkout", default_branch], cwd=repo_dir)
    run(["git", "pull", "origin", default_branch], cwd=repo_dir)

    # Create or reset working branch to start fresh from default branch.
    # We reset instead of rebase because the script overwrites all port files
    # and commit_and_push() squashes into one commit anyway.  Rebasing can
    # conflict when a previous version's changes were already merged.
    result = subprocess.run(
        ["git", "show-ref", "--verify", f"refs/heads/{branch}"],
        cwd=repo_dir,
        capture_output=True,
    )
    if result.returncode == 0:
        print(f"  Branch {branch} already exists, resetting to {default_branch} ...")
        run(["git", "checkout", branch], cwd=repo_dir)
        run(["git", "reset", "--hard", default_branch], cwd=repo_dir)
    else:
        print(f"  Creating branch {branch} from {default_branch} ...")
        run(["git", "checkout", "-b", branch], cwd=repo_dir)

    # Restore stashed changes if any
    if stashed:
        print("  Restoring stashed changes ...")
        run(["git", "stash", "pop"], cwd=repo_dir, check=False)


def commit_and_push(repo_dir, branch, message, github_user, dry_run, skip_push):
    """Stage all changes, squash into a single commit, push to fork.

    If the branch already has commits beyond the upstream base, soft-reset to the
    base and re-commit everything as a single commit. This ensures the branch always
    has exactly one clean commit regardless of how many iterations have been made.
    """
    if dry_run:
        run(["git", "add", "-A"], cwd=repo_dir, dry_run=True)
        run(["git", "commit", "-m", message], cwd=repo_dir, dry_run=True)
        if not skip_push:
            run(
                ["git", "push", "-u", "fork", branch, "--force"],
                cwd=repo_dir,
                dry_run=True,
            )
        return True

    # Stage everything
    run(["git", "add", "-A"], cwd=repo_dir)

    # Find the merge base with the upstream default branch
    merge_base = None
    for candidate in ["origin/main", "origin/master"]:
        result = subprocess.run(
            ["git", "merge-base", candidate, "HEAD"],
            cwd=repo_dir,
            capture_output=True,
            text=True,
        )
        if result.returncode == 0:
            merge_base = result.stdout.strip()
            break

    if merge_base:
        # Check if there are any differences from the merge base
        result = subprocess.run(
            ["git", "diff", "--cached", merge_base, "--quiet"],
            cwd=repo_dir,
        )
        if result.returncode == 0:
            # Also check uncommitted staged changes
            result = subprocess.run(
                ["git", "diff", "--cached", "--quiet"],
                cwd=repo_dir,
            )
            if result.returncode == 0:
                print(
                    "  WARNING: No changes to commit — upstream already"
                    " has this version. Skipping."
                )
                return False

        # Soft-reset to merge base and recommit as single commit.
        # Re-add after reset so the index reflects all working-tree changes
        # relative to the new HEAD (merge_base), not the old HEAD.
        print("  Squashing into single commit ...")
        run(["git", "reset", "--soft", merge_base], cwd=repo_dir)
        run(["git", "add", "-A"], cwd=repo_dir)
        run(["git", "commit", "-m", message], cwd=repo_dir)
    else:
        # Fallback: just commit normally if we can't find a merge base
        result = subprocess.run(
            ["git", "diff", "--cached", "--quiet"],
            cwd=repo_dir,
        )
        if result.returncode == 0:
            print("  No staged changes to commit")
        else:
            run(["git", "commit", "-m", message], cwd=repo_dir)

    if not skip_push:
        run(["git", "push", "-u", "fork", branch, "--force"], cwd=repo_dir)

    return True


# ---------------------------------------------------------------------------
# Per-manager test functions
# ---------------------------------------------------------------------------


def test_conan(repo_dir, version):
    """Test conan recipe with conan create."""
    conan_bin = shutil.which("conan")
    if not conan_bin:
        print("  WARNING: conan not found, skipping test")
        return False

    print("  Running conan create ...")
    result = run(
        [conan_bin, "create", "recipes/dispenso/all", f"--version={version}"],
        cwd=repo_dir,
        check=False,
    )
    if result.returncode != 0:
        print("  FAIL: conan create failed")
        return False

    print("  PASS: conan create succeeded")
    return True


def test_vcpkg(repo_dir, version):
    """Test vcpkg port with vcpkg install."""
    vcpkg_bin = shutil.which("vcpkg")
    if not vcpkg_bin:
        print("  WARNING: vcpkg not found, skipping test")
        return False

    # Detect default triplet so we remove/install for the right platform
    if sys.platform == "darwin":
        machine = platform.machine()  # arm64 or x86_64
        default_triplet = "arm64-osx" if machine == "arm64" else "x64-osx"
    elif sys.platform == "win32":
        default_triplet = "x64-windows"
    else:
        default_triplet = "x64-linux"

    # Remove existing install to force rebuild
    print(f"  Removing existing vcpkg install (if any) [{default_triplet}] ...")
    run(
        [vcpkg_bin, "remove", f"dispenso:{default_triplet}"],
        cwd=repo_dir,
        check=False,
    )

    print("  Running vcpkg install ...")
    result = run(
        [
            vcpkg_bin,
            "install",
            f"dispenso:{default_triplet}",
            f"--overlay-ports={os.path.join(repo_dir, 'ports', 'dispenso')}",
            "--no-binarycaching",
        ],
        cwd=repo_dir,
        check=False,
    )
    if result.returncode != 0:
        print("  FAIL: vcpkg install failed")
        return False

    print("  PASS: vcpkg install succeeded")
    return True


def test_homebrew(repo_dir, version):
    """Test homebrew formula with brew install, test, and audit."""
    brew_bin = shutil.which("brew")
    if not brew_bin:
        print("  WARNING: brew not found — full testing requires macOS")
        return False

    brew_repo = subprocess.run(
        [brew_bin, "--repository"],
        capture_output=True,
        text=True,
        check=True,
    ).stdout.strip()

    real_core = os.path.join(brew_repo, "Library", "Taps", "homebrew", "homebrew-core")
    backup_core = None

    try:
        # Swap in our local checkout as homebrew-core
        if os.path.exists(real_core):
            backup_core = f"{real_core}.bak.{os.getpid()}"
            print("  Temporarily replacing homebrew-core tap ...")
            os.rename(real_core, backup_core)
        os.symlink(repo_dir, real_core)

        # Uninstall if already installed
        run([brew_bin, "uninstall", "dispenso"], check=False)

        # Install from source
        print(
            "  Running HOMEBREW_NO_INSTALL_FROM_API=1 brew install --build-from-source ..."
        )
        brew_env = {**os.environ, "HOMEBREW_NO_INSTALL_FROM_API": "1"}
        result = run(
            [brew_bin, "install", "--build-from-source", "dispenso"],
            check=False,
            capture=True,
            env=brew_env,
        )
        if result.returncode != 0:
            print(f"  FAIL: brew install failed\n{result.stderr}")
            return False

        # Run tests
        print("  Running brew test ...")
        result = run([brew_bin, "test", "dispenso"], check=False)
        if result.returncode != 0:
            print("  FAIL: brew test failed")
            return False

        # Audit
        print("  Running brew audit --strict ...")
        result = run([brew_bin, "audit", "--strict", "dispenso"], check=False)
        if result.returncode != 0:
            print("  WARNING: brew audit --strict reported issues")

        print("  PASS: homebrew tests succeeded")
        return True

    finally:
        # Restore original homebrew-core tap
        if os.path.islink(real_core):
            os.unlink(real_core)
        if backup_core and os.path.exists(backup_core):
            print("  Restoring original homebrew-core tap ...")
            os.rename(backup_core, real_core)
        # Clean up
        run([brew_bin, "uninstall", "dispenso"], check=False)


def test_macports(repo_dir, version, hashes=None):
    """Test macports portfile with port lint, install, and test."""
    # Verify checksums first — this catches the most common error
    if hashes:
        print("  --- Verifying checksums ---")
        if not verify_portfile_checksums(repo_dir, hashes):
            print("  FAIL: checksum verification failed — fix before continuing")
            return False

    port_bin = shutil.which("port")
    if not port_bin:
        print("  WARNING: port not found — install MacPorts to run full tests")
        return False

    portdir = os.path.join(repo_dir, "devel", "dispenso")

    print("  Running port lint --nitpick ...")
    result = run(
        [port_bin, "-D", portdir, "lint", "--nitpick"],
        check=False,
    )
    if result.returncode != 0:
        print("  FAIL: port lint failed")
        return False
    print("  PASS: port lint succeeded")

    # Install and test require sudo.  Root cannot access files under the
    # user's home directory on macOS, so copy to /tmp.  Running sudo from a
    # Python subprocess may also be blocked by endpoint security tools, so
    # print the commands for the user to run manually.
    tmp_parent = tempfile.mkdtemp(prefix="dispenso-port-", dir="/tmp")
    os.chmod(tmp_parent, 0o755)
    tmp_portdir = os.path.join(tmp_parent, "dispenso")
    shutil.copytree(portdir, tmp_portdir)
    for dirpath, dirs, files in os.walk(tmp_portdir):
        for d in dirs:
            os.chmod(os.path.join(dirpath, d), 0o755)
        for f in files:
            os.chmod(os.path.join(dirpath, f), 0o644)

    print()
    print("  The next steps require sudo.  Please run each command manually")
    print("  in a separate terminal, then come back and report the result.")

    install_ok = False
    test_ok = False
    install_note = ""
    test_note = ""

    # Step 1: install
    print()
    print("  Step 1 — run this command:")
    print(f"    sudo {port_bin} -D {tmp_portdir} -vst install")
    print()
    answer = input("  Result? [y=succeeded / or describe what happened] ").strip()
    if answer.lower() == "y":
        install_ok = True
        print("  PASS: port install (user-confirmed)")
    else:
        install_note = answer
        print(f"  NOTED: port install issue — {answer}")

    # Step 2: test
    print()
    print("  Step 2 — run this command:")
    print(f"    sudo {port_bin} -D {tmp_portdir} test")
    print()
    answer = input("  Result? [y=succeeded / or describe what happened] ").strip()
    if answer.lower() == "y":
        test_ok = True
        print("  PASS: port test (user-confirmed)")
    else:
        test_note = answer
        print(f"  NOTED: port test issue — {answer}")

    # Summarize
    print()
    if install_ok and test_ok:
        print("  PASS: port install + test both succeeded")
    elif test_ok and not install_ok:
        print("  PARTIAL: port test passed but install had issues")
        print(f"    install note: {install_note}")
    elif install_ok and not test_ok:
        print("  PARTIAL: port install passed but test had issues")
        print(f"    test note: {test_note}")
    else:
        print("  FAIL: both install and test had issues")

    if not (install_ok and test_ok):
        print(f"  (temp port dir preserved at {tmp_portdir} for debugging)")
    else:
        shutil.rmtree(tmp_parent, ignore_errors=True)

    return test_ok


def get_macos_tested_on():
    """Gather macOS system info for MacPorts 'Tested on' PR section.

    Returns a formatted string matching the MacPorts PR template, or None
    if not running on macOS.
    """
    if platform.system() != "Darwin":
        return None

    lines = []

    # macOS version + build + arch
    result = subprocess.run(
        ["sw_vers", "-productVersion"], capture_output=True, text=True
    )
    if result.returncode == 0:
        mac_ver = result.stdout.strip()
        build_result = subprocess.run(
            ["sw_vers", "-buildVersion"], capture_output=True, text=True
        )
        build = build_result.stdout.strip() if build_result.returncode == 0 else ""
        arch_result = subprocess.run(["uname", "-m"], capture_output=True, text=True)
        arch = arch_result.stdout.strip() if arch_result.returncode == 0 else ""
        lines.append(f"macOS {mac_ver} {build} {arch}")

    # Xcode or Command Line Tools version
    result = subprocess.run(["xcodebuild", "-version"], capture_output=True, text=True)
    if result.returncode == 0:
        parts = result.stdout.strip().split("\n")
        if len(parts) >= 2:
            # "Xcode 16.2\nBuild version 16C5032a" → "Xcode 16.2 16C5032a"
            lines.append(f"{parts[0]} {parts[-1].split()[-1]}")
        elif parts:
            lines.append(parts[0])
    else:
        result = subprocess.run(
            ["pkgutil", "--pkg-info=com.apple.pkg.CLTools_Executables"],
            capture_output=True,
            text=True,
        )
        if result.returncode == 0:
            for line in result.stdout.split("\n"):
                if "version:" in line:
                    lines.append(f"Command Line Tools {line.split(':')[1].strip()}")
                    break

    return "\n".join(lines) if lines else None


# ---------------------------------------------------------------------------
# Post-write checksum verification
# ---------------------------------------------------------------------------

MACOS_ONLY_MANAGERS = {"homebrew", "macports"}


def verify_portfile_checksums(repo_dir, hashes):
    """Verify that the Portfile checksums match the computed hashes."""
    portfile_path = os.path.join(repo_dir, "devel", "dispenso", "Portfile")
    content = open(portfile_path).read()

    ok = True
    for field, key in [("rmd160", "rmd160"), ("sha256", "sha256"), ("size", "size")]:
        match = re.search(rf"{field}\s+(\S+)", content)
        if not match:
            print(f"  WARNING: {field} not found in Portfile")
            continue
        expected = str(hashes[key])
        actual = match.group(1)
        if actual != expected:
            print(f"  FAIL: {field} mismatch in Portfile")
            print(f"    Portfile:  {actual}")
            print(f"    Expected:  {expected}")
            ok = False

    if ok:
        print("  PASS: Portfile checksums match computed hashes")
    return ok


def verify_formula_checksums(repo_dir, hashes):
    """Verify that the Homebrew formula sha256 matches the computed hash."""
    formula_path = os.path.join(repo_dir, "Formula", "d", "dispenso.rb")
    content = open(formula_path).read()

    match = re.search(r'sha256\s+"([0-9a-fA-F]+)"', content)
    if not match:
        print("  WARNING: sha256 not found in formula")
        return False

    expected = hashes["sha256"]
    actual = match.group(1)
    if actual != expected:
        print("  FAIL: sha256 mismatch in formula")
        print(f"    Formula:   {actual}")
        print(f"    Expected:  {expected}")
        return False

    print("  PASS: Formula sha256 matches computed hash")
    return True


# ---------------------------------------------------------------------------
# Per-manager update functions
# ---------------------------------------------------------------------------


def ensure_conan_issue(version, github_user, dry_run):
    """Search for or create a Conan Center Index issue for this version.

    Returns the issue number if found/created, or None on failure.
    """
    repo = UPSTREAM_REPOS["conan"]
    search_query = f"[package] dispenso/{version}"

    # Search for existing issue
    print(f"  Searching for existing Conan issue: {search_query}")
    if dry_run:
        print(f"  [DRY RUN] Would search/create issue for dispenso/{version}")
        return None

    result = subprocess.run(
        [
            "gh",
            "search",
            "issues",
            search_query,
            "--repo",
            repo,
            "--json",
            "number,title",
            "--limit",
            "5",
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode == 0 and result.stdout.strip():
        try:
            issues = json.loads(result.stdout)
            for issue in issues:
                if f"dispenso/{version}" in issue.get("title", ""):
                    number = issue["number"]
                    print(f"  Found existing issue #{number}: {issue['title']}")
                    return number
        except (json.JSONDecodeError, KeyError):
            pass

    # Create new issue
    print(f"  Creating new Conan issue for dispenso/{version} ...")
    issue_body = f"""\
### Package Details
- **Name**: dispenso
- **Version**: {version}
- **Homepage**: https://github.com/facebookincubator/dispenso
- **License**: MIT

### Description
dispenso is a high-performance C++ library for parallel programming from Meta. \
It provides work-stealing thread pools, parallel for loops, futures, task graphs, \
pipelines, and concurrent containers. Requires C++14, no external dependencies \
beyond pthreads."""

    result = subprocess.run(
        [
            "gh",
            "issue",
            "create",
            "--repo",
            repo,
            "--title",
            f"[package] dispenso/{version}",
            "--body",
            issue_body,
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode == 0:
        # Output is the issue URL, extract number from it
        url = result.stdout.strip()
        match = re.search(r"/issues/(\d+)", url)
        if match:
            number = int(match.group(1))
            print(f"  Created issue #{number}: {url}")
            return number
        print(f"  Created issue: {url}")
        return None

    print(f"  WARNING: Failed to create Conan issue: {result.stderr.strip()}")
    return None


def update_conan(args, hashes, tarball_path):
    """Update conan-center-index with new dispenso version."""
    print("=== Conan (conan-center-index) ===")
    version = args.version
    manager = "conan"
    branch = BRANCH_NAMES.get(manager, "add-dispenso")

    repo_dir = ensure_repo(args.repos_dir, manager, args.github_user, args.dry_run)
    checkout_branch(repo_dir, branch, args.dry_run)

    # --- Update conandata.yml ---
    conandata_path = os.path.join(
        repo_dir, "recipes", "dispenso", "all", "conandata.yml"
    )
    print(f"  Updating {conandata_path} ...")

    if not args.dry_run:
        content = open(conandata_path).read()

        # Check if version already present
        if f'"{version}"' in content:
            print(f"  Version {version} already present in conandata.yml")
        else:
            # Insert new version entry right after "sources:\n"
            new_entry = (
                f'  "{version}":\n'
                f'    url: "{hashes["url"]}"\n'
                f'    sha256: "{hashes["sha256"]}"\n'
            )
            content = content.replace("sources:\n", f"sources:\n{new_entry}", 1)
            open(conandata_path, "w").write(content)
            print(f"  Added version {version} to conandata.yml")
    else:
        print(f"  [DRY RUN] Would add {version} entry with sha256={hashes['sha256']}")

    # --- Update config.yml ---
    config_path = os.path.join(repo_dir, "recipes", "dispenso", "config.yml")
    print(f"  Updating {config_path} ...")

    if not args.dry_run:
        content = open(config_path).read()

        if f'"{version}"' in content:
            print(f"  Version {version} already present in config.yml")
        else:
            new_entry = f'  "{version}":\n    folder: all\n'
            content = content.replace("versions:\n", f"versions:\n{new_entry}", 1)
            open(config_path, "w").write(content)
            print(f"  Added version {version} to config.yml")
    else:
        print(f"  [DRY RUN] Would add {version} entry to config.yml")

    # --- Ensure GitHub issue exists ---
    print("  --- Conan issue ---")
    issue_number = ensure_conan_issue(version, args.github_user, args.dry_run)

    # --- Commit ---
    commit_msg = f"dispenso: add version {version}"
    committed = commit_and_push(
        repo_dir, branch, commit_msg, args.github_user, args.dry_run, skip_push=True
    )

    # --- Test ---
    test_passed = True
    if committed and not args.dry_run and not args.skip_test:
        print("  --- Testing ---")
        test_passed = test_conan(repo_dir, version)

    # --- Push ---
    if not committed:
        print("  Skipping push — no changes to push")
    elif not args.dry_run and not args.skip_push:
        if not test_passed:
            print("  Skipping push due to test failure")
        else:
            run(["git", "push", "-u", "fork", branch, "--force"], cwd=repo_dir)

    branch_url = (
        f"https://github.com/{args.github_user}/conan-center-index/tree/{branch}"
    )
    tests_ran = not args.skip_test and not args.dry_run and test_passed
    title = pr_title("conan", version)
    body = pr_body_conan(version, tests_ran=tests_ran, issue_number=issue_number)
    print()
    return {
        "status": "no_changes"
        if not committed
        else "ok"
        if test_passed
        else "test_failed",
        "branch": branch,
        "branch_url": branch_url,
        "version": version,
        "tests_ran": tests_ran,
        "pr_title": title,
        "pr_body": body,
    }


def detect_obsolete_patches(tarball_path, port_dir, strip_level=1):
    """Check which patches in port_dir are obsolete against the tarball source.

    Extracts the tarball to a temp directory and tries `git apply --check` for
    each .patch file. Returns a list of patch filenames that no longer apply
    (i.e. the fix has been upstreamed).

    Args:
        strip_level: Number of leading path components to strip (0 for MacPorts,
            1 for vcpkg/git-style patches).
    """
    patch_files = [f for f in os.listdir(port_dir) if f.endswith(".patch")]
    if not patch_files:
        return []

    obsolete = []
    tmpdir = tempfile.mkdtemp(prefix="dispenso-patch-check-")
    try:
        # Extract tarball
        with tarfile.open(tarball_path, "r:gz") as tf:
            tf.extractall(tmpdir, filter="data")

        # Find the extracted directory (e.g. dispenso-1.5.1/)
        entries = os.listdir(tmpdir)
        if len(entries) == 1 and os.path.isdir(os.path.join(tmpdir, entries[0])):
            src_dir = os.path.join(tmpdir, entries[0])
        else:
            src_dir = tmpdir

        # Initialize a temporary git repo so we can use git apply --check
        subprocess.run(["git", "init"], cwd=src_dir, capture_output=True, check=True)
        subprocess.run(
            ["git", "add", "-A"], cwd=src_dir, capture_output=True, check=True
        )
        subprocess.run(
            ["git", "commit", "-m", "initial"],
            cwd=src_dir,
            capture_output=True,
            check=True,
        )

        for patch_file in patch_files:
            patch_path = os.path.join(port_dir, patch_file)
            result = subprocess.run(
                ["git", "apply", "--check", f"-p{strip_level}", patch_path],
                cwd=src_dir,
                capture_output=True,
            )
            if result.returncode != 0:
                print(f"  Patch {patch_file}: OBSOLETE (no longer applies)")
                obsolete.append(patch_file)
            else:
                print(f"  Patch {patch_file}: still needed (applies cleanly)")

    finally:
        shutil.rmtree(tmpdir, ignore_errors=True)

    return obsolete


def remove_obsolete_patches(port_dir, portfile_path, obsolete_patches):
    """Remove obsolete patches from portfile.cmake and delete patch files."""
    if not obsolete_patches:
        return

    content = open(portfile_path).read()

    for patch_file in obsolete_patches:
        # Delete the patch file
        patch_path = os.path.join(port_dir, patch_file)
        if os.path.exists(patch_path):
            os.unlink(patch_path)
            print(f"  Deleted {patch_file}")

    # Check if there are remaining patches
    remaining = [f for f in os.listdir(port_dir) if f.endswith(".patch")]

    if not remaining:
        # Remove entire PATCHES block from portfile.cmake
        # Match: PATCHES\n    file1.patch\n    file2.patch\n (up to next keyword or end)
        content = re.sub(
            r"\s+PATCHES\n(?:\s+\S+\.patch\n?)+",
            "\n",
            content,
        )
        print("  Removed entire PATCHES block from portfile.cmake")
    else:
        # Remove only obsolete patch filenames from PATCHES block
        for patch_file in obsolete_patches:
            # Remove the line "    patch_file.patch\n"
            content = re.sub(
                rf"\s+{re.escape(patch_file)}\n?",
                "\n",
                content,
            )
        print(f"  Removed obsolete patches from portfile.cmake, kept: {remaining}")

    open(portfile_path, "w").write(content)


def _vcpkg_update_port_files(repo_dir, version, hashes, dry_run):
    """Update vcpkg.json version/port-version and portfile.cmake SHA512."""
    vcpkg_json_path = os.path.join(repo_dir, "ports", "dispenso", "vcpkg.json")
    print(f"  Updating {vcpkg_json_path} ...")

    if not dry_run:
        content = open(vcpkg_json_path).read()
        content = re.sub(
            r'"version"\s*:\s*"[^"]*"',
            f'"version": "{version}"',
            content,
        )
        # Remove port-version (resets to 0 on version bump)
        content = re.sub(r',?\s*"port-version"\s*:\s*\d+', "", content)
        open(vcpkg_json_path, "w").write(content)
        print(f"  Updated version to {version} (removed port-version if present)")
    else:
        print(f"  [DRY RUN] Would update version to {version} (remove port-version)")

    portfile_path = os.path.join(repo_dir, "ports", "dispenso", "portfile.cmake")
    print(f"  Updating {portfile_path} ...")

    if not dry_run:
        content = open(portfile_path).read()
        content = re.sub(
            r"SHA512\s+[0-9a-fA-F]+",
            f"SHA512 {hashes['sha512']}",
            content,
        )
        open(portfile_path, "w").write(content)
        print("  Updated SHA512")
    else:
        print(f"  [DRY RUN] Would update SHA512 to {hashes['sha512']}")

    return vcpkg_json_path


def _vcpkg_cleanup_patches(repo_dir, tarball_path, dry_run):
    """Detect and remove obsolete patches from the vcpkg port."""
    port_dir = os.path.join(repo_dir, "ports", "dispenso")
    portfile_path = os.path.join(port_dir, "portfile.cmake")

    if dry_run or not os.path.isdir(port_dir):
        return
    patch_files = [f for f in os.listdir(port_dir) if f.endswith(".patch")]
    if not patch_files:
        return

    print("  --- Checking patches against new source ---")
    obsolete = detect_obsolete_patches(tarball_path, port_dir)
    if obsolete:
        remove_obsolete_patches(port_dir, portfile_path, obsolete)


def _macports_cleanup_patches(repo_dir, tarball_path, dry_run):
    """Detect and remove obsolete patches from the MacPorts port.

    MacPorts patches use -p0 (no a/b prefixes) and are stored in a files/
    subdirectory. The Portfile references them via 'patchfiles'.
    """
    port_dir = os.path.join(repo_dir, "devel", "dispenso")
    files_dir = os.path.join(port_dir, "files")

    if dry_run or not os.path.isdir(files_dir):
        return

    patch_files = [f for f in os.listdir(files_dir) if f.endswith(".patch")]
    if not patch_files:
        return

    print("  --- Checking patches against new source ---")
    obsolete = detect_obsolete_patches(tarball_path, files_dir, strip_level=0)
    if not obsolete:
        return

    portfile_path = os.path.join(port_dir, "Portfile")

    # Delete obsolete patch files
    for patch_file in obsolete:
        patch_path = os.path.join(files_dir, patch_file)
        if os.path.exists(patch_path):
            os.unlink(patch_path)
            print(f"  Deleted {patch_file}")

    # Check if any patches remain
    remaining = [f for f in os.listdir(files_dir) if f.endswith(".patch")]

    # Update Portfile: remove patchfiles entries
    content = open(portfile_path).read()
    if not remaining:
        # Remove entire patchfiles line(s)
        content = re.sub(r"\n*patchfiles\s+.*\n?", "\n", content)
        print("  Removed patchfiles from Portfile")
        # Remove empty files/ directory
        try:
            os.rmdir(files_dir)
            print("  Removed empty files/ directory")
        except OSError:
            pass
    else:
        # Remove only obsolete entries from patchfiles
        for patch_file in obsolete:
            content = re.sub(rf"\s*{re.escape(patch_file)}", "", content)
        print(f"  Removed obsolete patches from Portfile, kept: {remaining}")

    open(portfile_path, "w").write(content)


def _vcpkg_run_tooling(repo_dir, vcpkg_json_path, dry_run):
    """Run vcpkg format-manifest and x-add-version if vcpkg is available."""
    vcpkg_bin = shutil.which("vcpkg")
    if not vcpkg_bin:
        print(
            "  WARNING: vcpkg not found. Run 'vcpkg format-manifest' and "
            "'vcpkg x-add-version dispenso --overlay-ports=ports/dispenso' "
            "manually before pushing."
        )
        return

    print("  Running vcpkg format-manifest ...")
    run(
        [vcpkg_bin, "format-manifest", vcpkg_json_path],
        cwd=repo_dir,
        check=False,
        dry_run=dry_run,
    )

    # x-add-version requires port changes to be committed first, so we make
    # a temporary commit. The final commit_and_push will squash everything.
    print("  Running vcpkg x-add-version ...")
    if not dry_run:
        run(["git", "add", "-A"], cwd=repo_dir)
        result = subprocess.run(["git", "diff", "--cached", "--quiet"], cwd=repo_dir)
        if result.returncode != 0:
            run(
                ["git", "commit", "-m", "temp: port changes for x-add-version"],
                cwd=repo_dir,
            )
    run(
        [
            vcpkg_bin,
            "x-add-version",
            "dispenso",
            "--overwrite-version",
            f"--overlay-ports={os.path.join(repo_dir, 'ports', 'dispenso')}",
        ],
        cwd=repo_dir,
        check=False,
        dry_run=dry_run,
    )
    # Version database changes from x-add-version will be staged by
    # commit_and_push's git add -A.


def _vcpkg_verify_port_files(repo_dir, version, hashes):
    """Verify port files contain the expected version and hash after updates."""
    port_dir = os.path.join(repo_dir, "ports", "dispenso")
    errors = []

    vcpkg_json_path = os.path.join(port_dir, "vcpkg.json")
    portfile_path = os.path.join(port_dir, "portfile.cmake")

    # Check vcpkg.json has correct version
    vcpkg_json = open(vcpkg_json_path).read()
    if f'"version": "{version}"' not in vcpkg_json:
        errors.append(f"vcpkg.json does not contain version {version}")

    # Check portfile.cmake has correct SHA512
    portfile = open(portfile_path).read()
    if hashes["sha512"] not in portfile:
        errors.append("portfile.cmake does not contain expected SHA512")

    # Check portfile doesn't reference deleted patch files.
    # Only match lines that look like bare filenames (no spaces, no comment chars).
    for line in portfile.splitlines():
        stripped = line.strip()
        if (
            stripped.endswith(".patch")
            and " " not in stripped
            and not stripped.startswith("#")
        ):
            patch_path = os.path.join(port_dir, stripped)
            if not os.path.exists(patch_path):
                errors.append(
                    f"portfile.cmake references {stripped} but file does not exist"
                )

    if errors:
        print("  ERROR: Port file verification failed:")
        for e in errors:
            print(f"    - {e}")
        raise RuntimeError("vcpkg port files are inconsistent; aborting")
    else:
        print("  Verified: vcpkg.json and portfile.cmake are consistent")


def update_vcpkg(args, hashes, tarball_path):
    """Update vcpkg with new dispenso version."""
    print("=== vcpkg ===")
    version = args.version
    manager = "vcpkg"
    branch = BRANCH_NAMES.get(manager, "add-dispenso")

    repo_dir = ensure_repo(args.repos_dir, manager, args.github_user, args.dry_run)
    checkout_branch(repo_dir, branch, args.dry_run)

    vcpkg_json_path = _vcpkg_update_port_files(repo_dir, version, hashes, args.dry_run)
    _vcpkg_cleanup_patches(repo_dir, tarball_path, args.dry_run)
    _vcpkg_run_tooling(repo_dir, vcpkg_json_path, args.dry_run)

    # Verify port files were updated correctly before committing.
    if not args.dry_run:
        _vcpkg_verify_port_files(repo_dir, version, hashes)

    # --- Commit ---
    commit_msg = f"[dispenso] Update to version {version}"
    committed = commit_and_push(
        repo_dir, branch, commit_msg, args.github_user, args.dry_run, skip_push=True
    )

    # --- Test ---
    test_passed = True
    if committed and not args.dry_run and not args.skip_test:
        print("  --- Testing ---")
        test_passed = test_vcpkg(repo_dir, version)

    # --- Push ---
    if not committed:
        print("  Skipping push — no changes to push")
    elif not args.dry_run and not args.skip_push:
        if not test_passed:
            print("  Skipping push due to test failure")
        else:
            run(["git", "push", "-u", "fork", branch, "--force"], cwd=repo_dir)

    branch_url = f"https://github.com/{args.github_user}/vcpkg/tree/{branch}"
    tests_ran = committed and not args.skip_test and not args.dry_run and test_passed
    title = pr_title("vcpkg", version)
    body = pr_body_vcpkg(version)
    print()
    return {
        "status": "no_changes"
        if not committed
        else "ok"
        if test_passed
        else "test_failed",
        "branch": branch,
        "branch_url": branch_url,
        "version": version,
        "tests_ran": tests_ran,
        "pr_title": title,
        "pr_body": body,
    }


def update_homebrew(args, hashes, tarball_path):
    """Update homebrew-core with new dispenso version."""
    print("=== Homebrew (homebrew-core) ===")
    version = args.version
    manager = "homebrew"
    branch = f"dispenso-{version}"

    repo_dir = ensure_repo(args.repos_dir, manager, args.github_user, args.dry_run)
    checkout_branch(repo_dir, branch, args.dry_run)

    formula_path = os.path.join(repo_dir, "Formula", "d", "dispenso.rb")
    print(f"  Updating {formula_path} ...")

    if not args.dry_run:
        if not os.path.exists(formula_path):
            print(f"  ERROR: Formula file not found at {formula_path}")
            print("  The dispenso formula may not exist yet in homebrew-core.")
            print()
            return {"status": "error", "error": "Formula file not found"}

        content = open(formula_path).read()

        # Update url
        content = re.sub(
            r'url\s+"https://github\.com/facebookincubator/dispenso/archive/refs/tags/v[^"]+\.tar\.gz"',
            f'url "https://github.com/{GITHUB_REPO}/archive/refs/tags/v{version}.tar.gz"',
            content,
        )

        # Update sha256
        content = re.sub(
            r'sha256\s+"[0-9a-fA-F]+"',
            f'sha256 "{hashes["sha256"]}"',
            content,
        )

        # Remove revision line (reset on version bump)
        content = re.sub(r"\n\s*revision\s+\d+", "", content)

        open(formula_path, "w").write(content)
        print("  Updated url, sha256 (removed revision if present)")
    else:
        print("  [DRY RUN] Would update url and sha256 in formula")

    # --- Verify checksums ---
    checksums_ok = True
    if not args.dry_run:
        print("  --- Verifying checksums ---")
        checksums_ok = verify_formula_checksums(repo_dir, hashes)

    # --- Commit ---
    commit_msg = f"dispenso {version}"
    committed = commit_and_push(
        repo_dir, branch, commit_msg, args.github_user, args.dry_run, skip_push=True
    )

    # --- Test ---
    fully_tested = False
    if committed and checksums_ok and not args.dry_run and not args.skip_test:
        print("  --- Testing ---")
        fully_tested = test_homebrew(repo_dir, version)

    # --- Push ---
    if not committed:
        print("  Skipping push — no changes to push")
    elif not args.dry_run and not args.skip_push:
        if not checksums_ok:
            print("  Skipping push due to checksum verification failure")
        else:
            run(["git", "push", "-u", "fork", branch, "--force"], cwd=repo_dir)

    branch_url = f"https://github.com/{args.github_user}/homebrew-core/tree/{branch}"
    title = pr_title("homebrew", version)
    body = pr_body_homebrew(version, tests_ran=fully_tested)
    print()
    return {
        "status": "no_changes"
        if not committed
        else "error"
        if not checksums_ok
        else "ok"
        if fully_tested or args.skip_test
        else "needs_macos",
        "branch": branch,
        "branch_url": branch_url,
        "version": version,
        "tests_ran": fully_tested,
        "pr_title": title,
        "pr_body": body,
    }


def update_macports(args, hashes, tarball_path):
    """Update macports-ports with new dispenso version."""
    print("=== MacPorts (macports-ports) ===")
    version = args.version
    manager = "macports"
    branch = f"dispenso-{version}"

    if hashes["rmd160"] is None:
        print("  ERROR: RIPEMD-160 hash not available, cannot update MacPorts")
        print()
        return {"status": "error", "error": "RIPEMD-160 hash not available"}

    repo_dir = ensure_repo(args.repos_dir, manager, args.github_user, args.dry_run)
    checkout_branch(repo_dir, branch, args.dry_run)

    portfile_path = os.path.join(repo_dir, "devel", "dispenso", "Portfile")
    print(f"  Updating {portfile_path} ...")

    if not args.dry_run:
        if not os.path.exists(portfile_path):
            print(f"  ERROR: Portfile not found at {portfile_path}")
            print("  The dispenso port may not exist yet in MacPorts.")
            print()
            return {"status": "error", "error": "Portfile not found"}

        content = open(portfile_path).read()

        # Update github.setup line version
        content = re.sub(
            r"(github\.setup\s+facebookincubator\s+dispenso\s+)\S+(\s+v)",
            rf"\g<1>{version}\2",
            content,
        )

        # Update checksums block
        content = re.sub(
            r"(checksums\s+rmd160\s+)\S+",
            rf"\g<1>{hashes['rmd160']}",
            content,
        )
        content = re.sub(
            r"(sha256\s+)\S+",
            rf"\g<1>{hashes['sha256']}",
            content,
        )
        content = re.sub(
            r"(size\s+)\S+",
            rf"\g<1>{hashes['size']}",
            content,
        )

        # Reset revision to 0 if present
        content = re.sub(
            r"(revision\s+)\d+",
            r"\g<1>0",
            content,
        )

        open(portfile_path, "w").write(content)
        print("  Updated github.setup version, checksums (rmd160, sha256, size)")
    else:
        print(f"  [DRY RUN] Would update version to {version} and checksums")

    # --- Verify checksums ---
    checksums_ok = True
    if not args.dry_run:
        print("  --- Verifying checksums ---")
        checksums_ok = verify_portfile_checksums(repo_dir, hashes)

    # --- Clean up obsolete patches ---
    _macports_cleanup_patches(repo_dir, tarball_path, args.dry_run)

    # --- Commit ---
    commit_msg = f"dispenso: update to {version}"
    committed = commit_and_push(
        repo_dir, branch, commit_msg, args.github_user, args.dry_run, skip_push=True
    )

    # --- Test ---
    fully_tested = False
    tested_on = None
    if committed and checksums_ok and not args.dry_run and not args.skip_test:
        print("  --- Testing ---")
        fully_tested = test_macports(repo_dir, version, hashes=hashes)
        if fully_tested:
            tested_on = get_macos_tested_on()

    # --- Push ---
    if not committed:
        print("  Skipping push — no changes to push")
    elif not args.dry_run and not args.skip_push:
        if not checksums_ok:
            print("  Skipping push due to checksum verification failure")
        else:
            run(["git", "push", "-u", "fork", branch, "--force"], cwd=repo_dir)

    branch_url = f"https://github.com/{args.github_user}/macports-ports/tree/{branch}"
    title = pr_title("macports", version)
    body = pr_body_macports(version, tests_ran=fully_tested, tested_on=tested_on)
    print()
    return {
        "status": "no_changes"
        if not committed
        else "error"
        if not checksums_ok
        else "ok"
        if fully_tested or args.skip_test
        else "needs_macos",
        "branch": branch,
        "branch_url": branch_url,
        "version": version,
        "tests_ran": fully_tested,
        "pr_title": title,
        "pr_body": body,
    }


# ---------------------------------------------------------------------------
# PR body templates
# ---------------------------------------------------------------------------


def pr_body_homebrew(version, tests_ran):
    """Generate PR body following Homebrew's PR template."""
    test_check = "[x]" if tests_ran else "[ ]"
    audit_line = (
        f"- {test_check} Does your build pass `brew audit --strict <formula>` "
        "(after doing `HOMEBREW_NO_INSTALL_FROM_API=1 brew install "
        "--build-from-source <formula>`)? If this is a new formula, "
        "does it pass `brew audit --new <formula>`?"
    )
    return f"""\
- [ ] Have you followed the [guidelines for contributing](https://github.com/Homebrew/homebrew-core/blob/HEAD/CONTRIBUTING.md)?
- [x] Have you ensured that your commits follow the [commit style guide](https://docs.brew.sh/Formula-Cookbook#commit)?
- [x] Have you checked that there aren't other open [pull requests](https://github.com/Homebrew/homebrew-core/pulls) for the same formula update/change?
- {test_check} Have you built your formula locally with `HOMEBREW_NO_INSTALL_FROM_API=1 brew install --build-from-source <formula>`?
- {test_check} Is your test running fine `brew test <formula>`?
{audit_line}

-----

- [x] AI was used to generate or assist with generating this PR. \
A script ([update_package_managers.py](https://github.com/facebookincubator/dispenso/blob/main/scripts/update_package_managers.py)) \
was used to update the version, URL, and SHA256 in the formula. \
The formula was verified locally before submission.

-----

Update dispenso to {version}.

[dispenso](https://github.com/facebookincubator/dispenso) is a high-performance \
C++ library for parallel programming from Meta. It provides work-stealing thread \
pools, parallel for loops, futures, task graphs, pipelines, and concurrent containers. \
Requires C++14, no external dependencies beyond pthreads."""


def pr_body_conan(version, tests_ran, issue_number=None):
    """Generate PR body following Conan Center Index's PR template."""
    test_check = "[x]" if tests_ran else "[ ]"
    issue_line = f"\nfixes #{issue_number}\n" if issue_number else ""
    return f"""\
### Summary
Changes to recipe:  **dispenso/{version}**
{issue_line}
#### Motivation
Update dispenso to version {version}.

#### Details
Added version {version} entry to `conandata.yml` and `config.yml`. \
No changes to `conanfile.py` (version-agnostic).

---
- [x] Read the [contributing guidelines](https://github.com/conan-io/conan-center-index/blob/master/CONTRIBUTING.md)
- [x] Checked that this PR is not a duplicate: [list of PRs by recipe](https://github.com/conan-io/conan-center-index/discussions/24240)
- [ ] If this is a bug fix, please link related issue or provide bug details
- {test_check} Tested locally with at least one configuration using a recent version of Conan
---"""


def pr_body_vcpkg(version):
    """Generate PR body following vcpkg's PR template."""
    return f"""\
## Port Update Checklist

- [x] Complies with the [maintainer guide](https://learn.microsoft.com/en-us/vcpkg/contributing/maintainer-guide)
- [x] Updated SHA512 checksums
- [x] Version database updated via `./vcpkg x-add-version --all`
- [x] Exactly one version added per modified versions file

Update dispenso to version {version}."""


def pr_body_macports(version, tests_ran, tested_on=None):
    """Generate PR body following MacPorts' PR template."""
    test_check = "[x]" if tests_ran else "[ ]"
    if tested_on:
        tested_on_section = tested_on
    else:
        tested_on_section = (
            "<!-- Run and paste output of: "
            "port version && sw_vers && xcode-select -p -->"
        )
    return f"""\
#### Description
Update dispenso to version {version}.

[dispenso](https://github.com/facebookincubator/dispenso) is a high-performance \
C++ parallel programming library from Meta.

###### Type(s)
- [ ] bugfix
- [x] enhancement
- [ ] security fix

###### Tested on
{tested_on_section}

###### Verification
- [x] Followed [Commit Message Guidelines](https://trac.macports.org/wiki/CommitMessages)
- [x] Squashed and minimized commits
- [x] Checked that there aren't other open [pull requests](https://github.com/macports/macports-ports/pulls) for the same change
- {test_check} Checked Portfile with `port lint --nitpick`
- {test_check} Tried existing tests with `sudo port test`
- {test_check} Tested with `sudo port -vst install`
- {test_check} Checked that binaries work as expected
- [x] Tested important variants (dispenso has no variants)"""


def pr_title(manager, version):
    """Generate the PR title for a given manager."""
    titles = {
        "conan": f"dispenso: add version {version}",
        "vcpkg": f"[dispenso] Update to version {version}",
        "homebrew": f"dispenso {version}",
        "macports": f"dispenso: update to {version}",
    }
    return titles.get(manager, f"dispenso {version}")


def pre_pr_checklist(manager, version, tests_ran):
    """Return items the user should manually verify before creating the PR."""
    checklists = {
        "homebrew": [
            "Read contributing guidelines: https://github.com/Homebrew/homebrew-core/blob/HEAD/CONTRIBUTING.md",
            "Run: HOMEBREW_NO_INSTALL_FROM_API=1 brew install --build-from-source dispenso",
            "Run: brew test dispenso",
            "Run: brew audit --strict dispenso",
        ],
        "conan": [
            "Sign the CLA if prompted on the PR",
            "Run: conan create recipes/dispenso/all --version=" + version,
        ],
        "vcpkg": [
            "Read maintainer guide: https://learn.microsoft.com/en-us/vcpkg/contributing/maintainer-guide",
            "Run: vcpkg install dispenso --overlay-ports=ports/dispenso",
        ],
        "macports": [
            "Fill in 'Tested on' section with output of: port version && sw_vers && xcode-select -p",
            "Run: port lint --nitpick devel/dispenso",
            "Run: sudo port -D devel/dispenso -vst install",
            "Run: sudo port -D devel/dispenso test",
            "Verify installed binaries work",
        ],
    }

    items = checklists.get(manager, [])
    if tests_ran:
        # Filter out "Run:" items that the script already tested
        pass  # Keep all — user should still double-check
    return items


# ---------------------------------------------------------------------------
# PR creation and guided flow
# ---------------------------------------------------------------------------


def get_default_branch(upstream_repo):
    """Get the default branch of an upstream repo via gh API."""
    result = subprocess.run(
        ["gh", "api", f"repos/{upstream_repo}", "--jq", ".default_branch"],
        capture_output=True,
        text=True,
    )
    if result.returncode == 0:
        return result.stdout.strip()
    return "master"


def close_superseded_prs(upstream_repo, version, github_user, dry_run):
    """Close any open dispenso PRs by this user in the upstream repo."""
    result = subprocess.run(
        [
            "gh",
            "pr",
            "list",
            "--repo",
            upstream_repo,
            "--author",
            github_user,
            "--search",
            "dispenso",
            "--state",
            "open",
            "--json",
            "number,title,url",
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0 or not result.stdout.strip():
        return

    try:
        prs = json.loads(result.stdout)
    except json.JSONDecodeError:
        return

    for pr in prs:
        title = pr.get("title", "")
        number = pr.get("number")
        if number is None:
            continue
        comment = (
            f"Superseded by version {version} update. Closing in favor of the new PR."
        )
        if dry_run:
            print(f"  [DRY RUN] Would close #{number}: {title}")
            continue

        close_result = subprocess.run(
            [
                "gh",
                "pr",
                "close",
                str(number),
                "--repo",
                upstream_repo,
                "--comment",
                comment,
            ],
            capture_output=True,
            text=True,
        )
        if close_result.returncode == 0:
            print(f"  Closed #{number}: {title}")
        else:
            print(
                f"  WARNING: Failed to close #{number}: {close_result.stderr.strip()}"
            )


def create_pr(upstream_repo, branch, title, body, github_user, dry_run):
    """Create a PR via gh CLI. Returns the PR URL or None."""
    if dry_run:
        print(f"  [DRY RUN] Would create PR: {title}")
        return None

    base = get_default_branch(upstream_repo)

    result = subprocess.run(
        [
            "gh",
            "pr",
            "create",
            "--repo",
            upstream_repo,
            "--head",
            f"{github_user}:{branch}",
            "--base",
            base,
            "--title",
            title,
            "--body",
            body,
        ],
        capture_output=True,
        text=True,
    )
    if result.returncode == 0:
        url = result.stdout.strip()
        print(f"  Created PR: {url}")
        return url

    stderr = result.stderr.strip()
    # If a PR already exists for this branch, extract its URL
    if "already exists" in stderr:
        print(f"  PR already exists for branch {branch}")
        view_result = subprocess.run(
            [
                "gh",
                "pr",
                "view",
                branch,
                "--repo",
                upstream_repo,
                "--json",
                "url",
                "--jq",
                ".url",
            ],
            capture_output=True,
            text=True,
        )
        if view_result.returncode == 0:
            url = view_result.stdout.strip()
            print(f"  Existing PR: {url}")
            return url
        return None

    print(f"  ERROR: Failed to create PR: {stderr}")
    return None


def create_prs_phase(results, args):
    """Create PRs for all successful managers. Returns dict of manager→URL."""
    if args.skip_push:
        print("  --skip-push is set — cannot create PRs without pushing first.")
        return {}

    gh_bin = shutil.which("gh")
    if not gh_bin:
        print("  ERROR: gh CLI not found — install it to create PRs automatically.")
        print("  PR titles and bodies were printed above for manual creation.")
        return {}

    print()
    print("=" * 60)
    print("CREATING PRs")
    print("=" * 60)

    pr_urls = {}
    for mgr, result in results.items():
        if result.get("status") not in ("ok",):
            continue

        upstream = UPSTREAM_REPOS[mgr]
        branch = result.get("branch", "")
        title = result.get("pr_title", "")
        body = result.get("pr_body", "")

        if not branch or not title:
            continue

        print(f"\n--- {mgr} ---")

        # Close any superseded open PRs
        close_superseded_prs(upstream, args.version, args.github_user, args.dry_run)

        # Create the new PR
        url = create_pr(upstream, branch, title, body, args.github_user, args.dry_run)
        if url:
            pr_urls[mgr] = url

    if pr_urls:
        print()
        print("=" * 60)
        print("PR URLs")
        print("=" * 60)
        for mgr, url in pr_urls.items():
            print(f"  {mgr:12s}  {url}")

    return pr_urls


def post_pr_steps(pr_urls, version):
    """Guide the user through remaining manual post-PR steps."""
    print()
    print("=" * 60)
    print("POST-PR STEPS")
    print("=" * 60)

    if "conan" in pr_urls:
        print()
        print("  Conan: If this is your first PR from this account, the CLA")
        print("  bot will comment on the PR. Sign it if prompted.")
        input("  Press Enter to continue ...")

    print()
    print("  Monitor CI on each PR and respond to reviewer feedback.")
    print()
    print("  All done!")


def print_summary(results, github_user):
    """Print a summary of all operations."""
    print("=" * 60)
    print("SUMMARY")
    print("=" * 60)

    all_ok = True
    for mgr, result in results.items():
        status = result.get("status", "unknown")
        branch_url = result.get("branch_url", "")
        error = result.get("error")

        if status == "ok":
            print(f"  PASS  {mgr:12s}  {branch_url}")
        elif status == "test_failed":
            print(f"  FAIL  {mgr:12s}  Tests failed (branch not pushed)")
            all_ok = False
        elif status == "error":
            print(f"  ERR   {mgr:12s}  {error}")
            all_ok = False
        elif status == "needs_macos":
            print(f"  WARN  {mgr:12s}  Checksums OK, but full testing requires macOS")
            print(f"        {'':12s}  {branch_url}")
        elif status == "no_changes":
            print(f"  SKIP  {mgr:12s}  Upstream already has this version")
        elif status == "skipped":
            print(f"  SKIP  {mgr:12s}  {result.get('reason', '')}")
        else:
            print(f"  ??    {mgr:12s}  {result}")
            all_ok = False

    print()
    if all_ok:
        print("All managers succeeded. Review branches on GitHub, then create PRs.")
    else:
        print("Some managers had issues. Fix and re-run for the failed ones.")
    print()

    # Print PR body text for each successful manager
    for mgr, result in results.items():
        if result.get("status") != "ok":
            continue
        version = result.get("version", "")
        tests_ran = result.get("tests_ran", False)
        body = result.get("pr_body", "")
        title = result.get("pr_title", "")
        if not body:
            continue

        print("=" * 60)
        print(f"PR for {mgr}")
        print("=" * 60)
        print(f"Title: {title}")
        print(f"Branch: {result.get('branch_url', '')}")

        # Pre-PR checklist
        checklist = pre_pr_checklist(mgr, version, tests_ran)
        if checklist:
            print()
            print("  BEFORE CREATING PR, verify:")
            for i, item in enumerate(checklist, 1):
                done = "[x]" if tests_ran and item.startswith("Run:") else "[ ]"
                print(f"    {done} {item}")
            print()
            print("  Check any unchecked boxes in the PR body after verifying.")

        print("-" * 60)
        print(body)
        print("-" * 60)
        print()
    print("Copy-paste the above PR bodies when creating PRs on GitHub.")
    print()


# ---------------------------------------------------------------------------
# Guided interactive flow
# ---------------------------------------------------------------------------


def prompt_continue(message="Continue?"):
    """Prompt user to continue, skip, or quit. Returns the chosen action."""
    response = input(f"  {message} [Enter=yes / s=skip / q=quit] ").strip().lower()
    if response in ("", "y", "yes"):
        return "continue"
    if response in ("s", "skip"):
        return "skip"
    if response in ("q", "quit"):
        return "quit"
    return "continue"


def _detect_default_branch(repo_dir):
    """Detect the default branch from local remote refs."""
    result = subprocess.run(
        ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
        cwd=repo_dir,
        capture_output=True,
        text=True,
    )
    if result.returncode == 0:
        return result.stdout.strip().replace("refs/remotes/origin/", "")
    for candidate in ["main", "master"]:
        r = subprocess.run(
            ["git", "show-ref", "--verify", f"refs/remotes/origin/{candidate}"],
            cwd=repo_dir,
            capture_output=True,
        )
        if r.returncode == 0:
            return candidate
    return "master"


def _guided_run_updates(args, managers, hashes, tarball_path):
    """Run each manager's update handler, prompting before each.

    Returns (results, aborted).  *aborted* is True if the user chose 'quit'.
    """
    handlers = {
        "conan": update_conan,
        "vcpkg": update_vcpkg,
        "homebrew": update_homebrew,
        "macports": update_macports,
    }
    step_descriptions = {
        "vcpkg": "Update vcpkg port (version, SHA512, patches, version DB)",
        "conan": "Update Conan recipe (conandata.yml, config.yml, issue)",
        "homebrew": "Update Homebrew formula (URL, SHA256, build + test)",
        "macports": "Update MacPorts Portfile (version, checksums, lint + install + test)",
    }

    results = {}
    for mgr in managers:
        print()
        print("=" * 60)
        print(f"  {step_descriptions.get(mgr, mgr)}")
        if mgr in MACOS_ONLY_MANAGERS and platform.system() != "Darwin":
            print("  (full testing requires macOS — will verify checksums only)")
        print("=" * 60)

        action = prompt_continue(f"Proceed with {mgr}?")
        if action == "quit":
            print("\n  Quitting. Local commits (if any) are preserved.")
            return results, True
        if action == "skip":
            results[mgr] = {"status": "skipped", "reason": "skipped by user"}
            continue

        try:
            results[mgr] = handlers[mgr](args, hashes, tarball_path)
        except Exception as e:
            print(f"\n  ERROR: {mgr} failed: {e}")
            results[mgr] = {"status": "error", "error": str(e)}

        _print_manager_result(mgr, results[mgr])

    return results, False


def _print_manager_result(mgr, result):
    """Print the outcome of a single manager update."""
    status = result.get("status", "unknown")
    messages = {
        "ok": "PASSED",
        "no_changes": "SKIPPED — upstream already has this version",
        "needs_macos": "checksums OK — re-run on macOS for full testing",
        "test_failed": "TESTS FAILED — branch will not be pushed",
    }
    if status in messages:
        print(f"\n  {mgr}: {messages[status]}")
    elif status == "error":
        print(f"\n  {mgr}: ERROR — {result.get('error', '')}")


def _guided_push_branches(args, pushable):
    """Push branches to fork and print compare URLs.

    Returns dict of successfully pushed managers.
    """
    print()
    print("=" * 60)
    print("  Pushing branches to fork")
    print("=" * 60)

    pushed = {}
    for mgr, result in pushable.items():
        repo_dir = os.path.join(args.repos_dir, REPO_DIRS[mgr])
        branch = result.get("branch", "")
        if not branch:
            continue
        print(f"\n  {mgr}: pushing {branch} ...")
        run(["git", "push", "-u", "fork", branch, "--force"], cwd=repo_dir)
        pushed[mgr] = result

    print()
    print("  All branches pushed. Review the proposed changes:")
    print()
    for mgr, result in pushed.items():
        upstream = UPSTREAM_REPOS[mgr]
        repo_dir = os.path.join(args.repos_dir, REPO_DIRS[mgr])
        default = _detect_default_branch(repo_dir)
        branch = result.get("branch", "")
        repo_name = REPO_DIRS[mgr]
        compare_url = (
            f"https://github.com/{upstream}/compare/"
            f"{default}...{args.github_user}:{repo_name}:{branch}"
        )
        print(f"    {mgr:12s}  {compare_url}")

    return pushed


def _guided_create_prs(pushed, args):
    """Create PRs for successfully pushed managers via gh CLI.

    Returns dict mapping manager name to PR URL.
    """
    pr_urls = {}
    for mgr, result in pushed.items():
        if result.get("status") != "ok":
            continue
        upstream = UPSTREAM_REPOS[mgr]
        branch = result.get("branch", "")
        title = result.get("pr_title", "")
        body = result.get("pr_body", "")
        if not branch or not title:
            continue

        print(f"\n--- {mgr} ---")
        close_superseded_prs(upstream, args.version, args.github_user, False)
        url = create_pr(upstream, branch, title, body, args.github_user, False)
        if url:
            pr_urls[mgr] = url

    return pr_urls


def _guided_review_push_pr(args, results, pushable, tarball_path):
    """Review diffs, push branches, and create PRs.

    Handles all user prompts for the post-update phase.
    """
    version = args.version

    # ---- Inspect diffs ----
    print()
    print("=" * 60)
    print("  Review changes before pushing")
    print("=" * 60)
    print()
    print("  Inspect the diffs in each repo:")
    for mgr in pushable:
        repo_dir = os.path.join(args.repos_dir, REPO_DIRS[mgr])
        default = _detect_default_branch(repo_dir)
        print(f"    cd {repo_dir} && git diff origin/{default}...HEAD")

    action = prompt_continue("Diffs look good? Push to fork?")
    if action == "quit":
        print("\n  Quitting. Changes are committed locally in each repo.")
        _guided_cleanup(tarball_path)
        return
    if action == "skip":
        print("\n  Skipped push. Changes are committed locally.")
        print_summary(results, args.github_user)
        _guided_cleanup(tarball_path)
        return

    # ---- Push ----
    pushed = _guided_push_branches(args, pushable)

    # ---- Create PRs ----
    pr_urls = _guided_offer_pr_creation(args, pushed, results, tarball_path)
    if pr_urls is None:
        return

    if pr_urls:
        print()
        print("=" * 60)
        print("  PR URLs")
        print("=" * 60)
        for mgr, url in pr_urls.items():
            print(f"    {mgr:12s}  {url}")

    post_pr_steps(pr_urls, version)

    # ---- needs_macos reminder ----
    needs_macos = [m for m, r in results.items() if r.get("status") == "needs_macos"]
    if needs_macos:
        print()
        print("=" * 60)
        print("  macOS required for full testing")
        print("=" * 60)
        mgr_list = ",".join(needs_macos)
        print("\n  Re-run on macOS to complete testing and create PRs:")
        print(
            f"    python3 {sys.argv[0]} --version {version}"
            f" --managers {mgr_list} --guided"
        )

    _guided_cleanup(tarball_path)
    print()
    print("  Release update complete!")


def _guided_offer_pr_creation(args, pushed, results, tarball_path):
    """Prompt to create PRs via gh CLI.

    Returns pr_urls dict, or None if the user skipped/quit or gh is missing.
    """
    print()
    print("=" * 60)
    print("  Create pull requests")
    print("=" * 60)

    gh_bin = shutil.which("gh")
    if not gh_bin:
        print()
        print("  gh CLI not found. Install it to create PRs automatically.")
        print("  PR titles and bodies are printed below for manual creation.")
        print_summary(results, args.github_user)
        _guided_cleanup(tarball_path)
        return None

    action = prompt_continue("Create PRs via gh CLI?")
    if action in ("skip", "quit"):
        print("\n  Branches are pushed. Create PRs manually if needed.")
        print_summary(results, args.github_user)
        _guided_cleanup(tarball_path)
        return None

    return _guided_create_prs(pushed, args)


def guided_flow(args):
    """Run the complete release update flow interactively.

    Walks through every step, prompts before each action, and creates PRs
    at the end. Replaces the need for a separate checklist document.
    """
    version = args.version
    managers = args.managers

    print()
    print("=" * 60)
    print(f"  dispenso {version} — guided release update")
    print("=" * 60)
    print()
    print("  Managers: " + ", ".join(managers))
    if args.dry_run:
        print("  Mode: DRY RUN (no files will be modified)")
    if args.skip_test:
        print("  Mode: SKIP TESTS")
    print()
    print("  At each step you can press Enter to continue,")
    print("  's' to skip, or 'q' to quit.")
    print()

    # ---- Download tarball ----
    print("-" * 60)
    print("  Downloading release tarball ...")
    print("-" * 60)
    tarball_path, hashes = download_and_hash(version)

    # ---- Update each manager (no push) ----
    original_skip_push = args.skip_push
    args.skip_push = True
    results, aborted = _guided_run_updates(args, managers, hashes, tarball_path)
    args.skip_push = original_skip_push

    if aborted:
        _guided_cleanup(tarball_path)
        return

    pushable = {
        m: r for m, r in results.items() if r.get("status") in ("ok", "needs_macos")
    }

    if not pushable:
        print("\n  No managers succeeded. Fix issues and re-run.")
        _guided_cleanup(tarball_path)
        return

    if args.dry_run:
        print("\n  Dry run complete. Re-run without --dry-run to apply changes.")
        _guided_cleanup(tarball_path)
        return

    _guided_review_push_pr(args, results, pushable, tarball_path)


def _guided_cleanup(tarball_path):
    """Clean up the downloaded tarball."""
    if os.path.exists(tarball_path):
        os.unlink(tarball_path)
        print(f"\n  Cleaned up {tarball_path}")


def main():
    args = parse_args()

    if args.guided:
        guided_flow(args)
        return

    print(f"Updating dispenso to version {args.version}")
    if args.dry_run:
        print("*** DRY RUN MODE — no files will be modified ***")
    if args.skip_test:
        print("*** SKIP TEST MODE — no local verification ***")
    if args.skip_push:
        print("*** SKIP PUSH MODE — commit and test only ***")
    if args.create_prs:
        print("*** CREATE PRS MODE — will create PRs after push ***")
    print()

    tarball_path, hashes = download_and_hash(args.version)

    handlers = {
        "conan": update_conan,
        "vcpkg": update_vcpkg,
        "homebrew": update_homebrew,
        "macports": update_macports,
    }

    results = {}
    for mgr in args.managers:
        try:
            results[mgr] = handlers[mgr](args, hashes, tarball_path)
        except Exception as e:
            print(f"  ERROR: {mgr} failed: {e}")
            results[mgr] = {"status": "error", "error": str(e)}
            print()

    print_summary(results, args.github_user)

    # Create PRs if requested
    if args.create_prs:
        pr_urls = create_prs_phase(results, args)
        if pr_urls:
            post_pr_steps(pr_urls, args.version)

    # Clean up tarball
    if os.path.exists(tarball_path):
        os.unlink(tarball_path)
        print(f"Cleaned up {tarball_path}")

    # Check if any macOS-only managers need full testing
    needs_macos = [
        mgr for mgr, result in results.items() if result.get("status") == "needs_macos"
    ]
    if needs_macos:
        print()
        print("=" * 60)
        print("ERROR: Full testing requires macOS")
        print("=" * 60)
        print("  The following managers were updated and checksums verified,")
        print("  but full testing (lint, install, audit) requires macOS:")
        for mgr in needs_macos:
            print(f"    - {mgr}")
        print()
        print("  Please re-run on macOS to complete testing:")
        mgr_list = ",".join(needs_macos)
        print(
            f"    python3 {sys.argv[0]} --version {args.version} --managers {mgr_list}"
        )
        sys.exit(1)


if __name__ == "__main__":
    main()


================================================
FILE: tests/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.18)

### TODO(bbudge): Set up testing with github actions
# Ideally these tests are all run in (Release, Debug) X (N/A, TSAN, ASAN, -fno-exceptions)
###

# Try system-installed GTest first, fall back to FetchContent from GitHub.
find_package(GTest QUIET)
if(GTest_FOUND)
  message(STATUS "Found system GoogleTest")
  # System GTest provides namespaced targets (GTest::gmock_main); create
  # non-namespaced aliases so existing target_link_libraries calls work.
  if(NOT TARGET gmock_main AND TARGET GTest::gmock_main)
    add_library(gmock_main ALIAS GTest::gmock_main)
  endif()
else()
  message(STATUS "System GoogleTest not found, fetching from GitHub...")
  include(FetchContent)
  FetchContent_Declare(
    GoogleTest
    GIT_REPOSITORY https://github.com/google/googletest.git
    GIT_TAG        release-1.12.1
  )
  # For Windows, Prevent overriding the parent project's compiler/linker settings
  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
  set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
  FetchContent_MakeAvailable(GoogleTest)
endif()
include(GoogleTest)

macro(package_add_test TEST_NAME LABEL TEST_FILE)
    add_executable(${TEST_NAME} ${TEST_FILE})
    target_compile_options(${TEST_NAME} PRIVATE
      $<$<CXX_COMPILER_ID:MSVC>:/W3 /WX>
      # GCC false positives: stringop-overflow through deeply inlined atomics
      # (seen with GCC 13 on Linux, GCC 14 on macOS ARM64), and
      # maybe-uninitialized through inlined try_pop + gtest macros (GCC 13).
      $<$<CXX_COMPILER_ID:GNU>:-Wno-stringop-overflow -Wno-maybe-uninitialized>
      $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror>
      )
    target_link_libraries(${TEST_NAME} gmock_main dispenso)
    gtest_discover_tests(${TEST_NAME}
      WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
      PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
      LABELS "${LABEL}"
      DISCOVERY_MODE PRE_TEST)
    set_target_properties(${TEST_NAME} PROPERTIES FOLDER tests)
endmacro()

file(GLOB TEST_FILES CONFIGURE_DEPENDS "*test.cpp")

#TODO(elliotsegal, bbudge): Help add the shared_pool_test for CMake
LIST(REMOVE_ITEM TEST_FILES
  ${CMAKE_CURRENT_SOURCE_DIR}/shared_pool_test.cpp)

# Filter out these tests specifically because they are inherently flaky because they rely on OS behaviors that are not
# guaranteed, and only really useful for manual runs when making changes to the related functionality.  Note that
# possibly an even better test for both priority and timed_task behavior is to use the timed_task_benchmark.
LIST(REMOVE_ITEM TEST_FILES
  ${CMAKE_CURRENT_SOURCE_DIR}/priority_test.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/timed_task_test.cpp)

foreach(TEST_FILE ${TEST_FILES})
  set(TEST_NAME)
  get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
  package_add_test(${TEST_NAME} unittest ${TEST_FILE})
endforeach()

SET(FLAKY_TEST_FILES ${CMAKE_CURRENT_SOURCE_DIR}/priority_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timed_task_test.cpp)

foreach(TEST_FILE ${FLAKY_TEST_FILES})
  set(TEST_NAME)
  get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
  package_add_test(${TEST_NAME} flaky ${TEST_FILE})
endforeach()

if(DISPENSO_BUILD_FAST_MATH)
  add_subdirectory(fast_math)
endif()


================================================
FILE: tests/async_request_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <thread>

#include <dispenso/async_request.h>

#include <gtest/gtest.h>

TEST(AsyncRequest, SequentialAsExpected) {
  dispenso::AsyncRequest<int> req;

  EXPECT_FALSE(req.updateRequested());
  EXPECT_FALSE(req.tryEmplaceUpdate(5));
  EXPECT_FALSE(req.getUpdate());

  req.requestUpdate();

  EXPECT_TRUE(req.updateRequested());

  EXPECT_FALSE(req.getUpdate());

  EXPECT_TRUE(req.tryEmplaceUpdate(0));

  auto result = req.getUpdate();
  EXPECT_TRUE(result);
  EXPECT_EQ(0, result.value());
}

TEST(AsyncRequest, AsyncAsExpected) {
  dispenso::AsyncRequest<int> req;
  std::atomic<bool> running(true);
  std::thread t([&req, &running]() {
    int next = 0;
    while (running.load(std::memory_order_relaxed)) {
      if (req.updateRequested()) {
        req.tryEmplaceUpdate(next++);
      }
    }
  });

  int sum = 0;
  int sumExpected = 0;
  for (int i = 0; i < 5000; ++i) {
    sumExpected += i;

    req.requestUpdate();
    while (true) {
      auto result = req.getUpdate();
      if (result.has_value()) {
        sum += result.value();
        break;
      }
    }
  }

  running.store(false, std::memory_order_release);
  t.join();

  EXPECT_EQ(sum, sumExpected);
}


================================================
FILE: tests/chunked_for_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <list>
#include <vector>

#include <dispenso/concurrent_vector.h>
#include <dispenso/parallel_for.h>
#include <gtest/gtest.h>

TEST(ChunkedFor, SimpleLoop) {
  int w = 1024;
  int h = 1024;
  std::vector<int> image(static_cast<size_t>(w * h), 7);

  std::atomic<int64_t> sum(0);

  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, h, 8), [w, &image, &sum](int ystart, int yend) {
        EXPECT_EQ(yend - ystart, 8);
        int64_t s = 0;
        for (int y = ystart; y < yend; ++y) {
          int* row = image.data() + y * w;
          for (int i = 0; i < w; ++i) {
            s += row[i];
          }
        }
        sum.fetch_add(s, std::memory_order_relaxed);
      });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
}

TEST(ChunkedFor, ShouldNotInvokeIfEmptyRange) {
  int* myNullPtr = nullptr;

  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, 0, dispenso::ParForChunking::kAuto),
      [myNullPtr](int s, int e) { *myNullPtr = s + e; });

  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, 0, dispenso::ParForChunking::kStatic),
      [myNullPtr](int s, int e) { *myNullPtr = s + e; });
}

TEST(ChunkedFor, SimpleLoopStatic) {
  int w = 1024;
  int h = 1024;
  std::vector<int> image(static_cast<size_t>(w * h), 7);

  std::atomic<int64_t> sum(0);
  std::atomic<int> numCalls(0);

  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, h, dispenso::ParForChunking::kStatic),
      [w, &image, &sum, &numCalls](int ystart, int yend) {
        numCalls.fetch_add(1, std::memory_order_relaxed);
        int64_t s = 0;
        for (int y = ystart; y < yend; ++y) {
          int* row = image.data() + y * w;
          for (int i = 0; i < w; ++i) {
            s += row[i];
          }
        }
        sum.fetch_add(s, std::memory_order_relaxed);
      });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_LE(
      numCalls.load(std::memory_order_relaxed),
      static_cast<int>(std::thread::hardware_concurrency()));
}

TEST(ChunkedFor, SimpleLoopAuto) {
  int w = 1024;
  int h = 1024;
  std::vector<int> image(static_cast<size_t>(w * h), 7);

  std::atomic<int64_t> sum(0);
  std::atomic<int> numCalls(0);
  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, h, dispenso::ParForChunking::kAuto),
      [w, &image, &sum, &numCalls](int ystart, int yend) {
        numCalls.fetch_add(1, std::memory_order_relaxed);
        int64_t s = 0;
        for (int y = ystart; y < yend; ++y) {
          int* row = image.data() + y * w;
          for (int i = 0; i < w; ++i) {
            s += row[i];
          }
        }
        sum.fetch_add(s, std::memory_order_relaxed);
      });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_GT(
      numCalls.load(std::memory_order_relaxed),
      static_cast<int>(std::thread::hardware_concurrency()));
  EXPECT_LE(numCalls.load(std::memory_order_relaxed), 1024);
}

template <typename StateContainer>
void loopWithStateImpl() {
  int w = 1024;
  int h = 1024;
  std::vector<int> image(static_cast<size_t>(w * h), 7);

  StateContainer state;
  dispenso::parallel_for(
      state,
      []() { return int64_t{0}; },
      dispenso::makeChunkedRange(0, h, 16),
      [w, &image](int64_t& sum, int ystart, int yend) {
        EXPECT_EQ(yend - ystart, 16);
        int64_t s = 0;
        for (int y = ystart; y < yend; ++y) {
          int* row = image.data() + y * w;
          for (int i = 0; i < w; ++i) {
            s += row[i];
          }
        }
        sum += s;
      });

  int64_t sum = 0;
  for (int64_t s : state) {
    sum += s;
  }

  EXPECT_EQ(sum, w * h * 7);
}

TEST(ChunkedFor, LoopWithDequeState) {
  loopWithStateImpl<std::deque<int64_t>>();
}
TEST(ChunkedFor, LoopWithVectorState) {
  loopWithStateImpl<std::vector<int64_t>>();
}
TEST(ChunkedFor, LoopWithListState) {
  loopWithStateImpl<std::list<int64_t>>();
}

TEST(ChunkedFor, SimpleLoopSmallRangeAtLargeValues) {
  std::atomic<uint64_t> numCalls(0);

  dispenso::ThreadPool pool(6);
  dispenso::TaskSet tasks(pool);

  dispenso::parallel_for(
      tasks,
      dispenso::makeChunkedRange(
          std::numeric_limits<uint64_t>::max() / 2 - 100,
          std::numeric_limits<uint64_t>::max() / 2 + 1000,
          dispenso::ParForChunking::kAuto),
      [&numCalls](auto ystart, auto yend) {
        numCalls.fetch_add(yend - ystart, std::memory_order_relaxed);
      });

  EXPECT_EQ(numCalls.load(std::memory_order_relaxed), 1100);
}

TEST(ChunkedFor, SimpleLoopSmallRange) {
  std::atomic<int> numCalls(0);

  dispenso::ThreadPool pool(6);
  dispenso::TaskSet tasks(pool);

  dispenso::parallel_for(
      tasks,
      dispenso::makeChunkedRange(
          std::numeric_limits<int16_t>::min(),
          std::numeric_limits<int16_t>::max(),
          dispenso::ParForChunking::kAuto),
      [&numCalls](auto ystart, auto yend) {
        numCalls.fetch_add(yend - ystart, std::memory_order_relaxed);
      });

  EXPECT_EQ(numCalls.load(std::memory_order_relaxed), (1 << 16) - 1);
}

TEST(ChunkedFor, LoopSmallRangeWithState) {
  std::atomic<int> numCalls(0);

  dispenso::ThreadPool pool(6);
  dispenso::TaskSet tasks(pool);

  std::vector<int> state;

  dispenso::parallel_for(
      tasks,
      state,
      []() { return 0; },
      dispenso::makeChunkedRange(
          std::numeric_limits<int16_t>::min(),
          std::numeric_limits<int16_t>::max(),
          dispenso::ParForChunking::kAuto),
      [&numCalls](auto& s, auto ystart, auto yend) {
        numCalls.fetch_add(yend - ystart, std::memory_order_relaxed);
        s += (yend - ystart);
      });

  EXPECT_EQ(numCalls.load(std::memory_order_relaxed), (1 << 16) - 1);
  int total = 0;
  for (int s : state) {
    total += s;
  }
  EXPECT_EQ(total, (1 << 16) - 1);
}

TEST(ChunkedFor, SimpleLoopSmallRangeExternalWait) {
  std::atomic<int> numCalls(0);

  dispenso::ThreadPool pool(6);
  dispenso::TaskSet tasks(pool);

  dispenso::ParForOptions options;
  options.wait = false;

  dispenso::parallel_for(
      tasks,
      dispenso::makeChunkedRange(
          std::numeric_limits<int16_t>::min(),
          std::numeric_limits<int16_t>::max(),
          dispenso::ParForChunking::kAuto),
      [&numCalls](auto ystart, auto yend) {
        numCalls.fetch_add(yend - ystart, std::memory_order_relaxed);
      },
      options);
  tasks.wait();

  EXPECT_EQ(numCalls.load(std::memory_order_relaxed), (1 << 16) - 1);
}

TEST(ChunkedFor, LoopSmallRangeWithStateWithExternalWait) {
  std::atomic<int> numCalls(0);

  dispenso::ThreadPool pool(6);
  dispenso::TaskSet tasks(pool);

  std::vector<int> state;
  dispenso::ParForOptions options;
  options.wait = false;

  dispenso::parallel_for(
      tasks,
      state,
      []() { return 0; },
      dispenso::makeChunkedRange(
          std::numeric_limits<int16_t>::min(),
          std::numeric_limits<int16_t>::max(),
          dispenso::ParForChunking::kAuto),
      [&numCalls](auto& s, auto ystart, auto yend) {
        numCalls.fetch_add(yend - ystart, std::memory_order_relaxed);
        s += (yend - ystart);
      },
      options);

  tasks.wait();

  EXPECT_EQ(numCalls.load(std::memory_order_relaxed), (1 << 16) - 1);
  int total = 0;
  for (int s : state) {
    total += s;
  }
  EXPECT_EQ(total, (1 << 16) - 1);
}

static void minChunkSize(dispenso::ParForChunking choice, int start, int end, int minSize) {
  dispenso::ConcurrentVector<std::pair<int, int>> ranges;

  dispenso::ThreadPool pool(16);
  dispenso::TaskSet tasks(pool);

  dispenso::ParForOptions options;
  options.minItemsPerChunk = minSize;

  dispenso::parallel_for(
      tasks,
      dispenso::makeChunkedRange(start, end, choice),
      [&ranges](int ystart, int yend) { ranges.push_back({ystart, yend}); },
      options);

  EXPECT_GE(ranges.size(), 1);

  for (auto& r : ranges) {
    if (r.second != end) {
      EXPECT_LE(minSize, r.second - r.first);
    }
  }
}

TEST(ChunkedFor, MinChunkSizeLoopAuto) {
  minChunkSize(dispenso::ParForChunking::kAuto, 0, 1000000, 200);
  minChunkSize(dispenso::ParForChunking::kAuto, 0, 100, 200);
  minChunkSize(dispenso::ParForChunking::kAuto, 10000, 10020, 200);
  minChunkSize(dispenso::ParForChunking::kAuto, 1000000, 10000000, 20000);
  minChunkSize(dispenso::ParForChunking::kAuto, -10000000, -1000000, 20000);
}

TEST(ChunkedFor, MinChunkSizeLoopStatic) {
  minChunkSize(dispenso::ParForChunking::kStatic, 0, 1000000, 200);
  minChunkSize(dispenso::ParForChunking::kStatic, 0, 100, 200);
  minChunkSize(dispenso::ParForChunking::kStatic, 10000, 10020, 200);
  minChunkSize(dispenso::ParForChunking::kStatic, 1000000, 10000000, 20000);
  minChunkSize(dispenso::ParForChunking::kStatic, -10000000, -1000000, 20000);
}

template <typename StateContainer>
void loopWithStateImplReuseState() {
  int w = 1024;
  int h = 1024;
  std::vector<int> image(static_cast<size_t>(w * h), 7);

  StateContainer state;

  dispenso::ParForOptions options;
  options.reuseExistingState = true;

  for (size_t i = 0; i < 3; ++i) {
    dispenso::parallel_for(
        state,
        []() { return int64_t{0}; },
        dispenso::makeChunkedRange(0, h, 16),
        [w, &image](int64_t& sum, int ystart, int yend) {
          EXPECT_EQ(yend - ystart, 16);
          int64_t s = 0;
          for (int y = ystart; y < yend; ++y) {
            int* row = image.data() + y * w;
            for (int i = 0; i < w; ++i) {
              s += row[i];
            }
          }
          sum += s;
        },
        options);
  }

  int64_t sum = 0;
  for (int64_t s : state) {
    sum += s;
  }

  EXPECT_EQ(sum, 3 * w * h * 7);
}

TEST(ChunkedFor, LoopWithDequeStateReuse) {
  loopWithStateImplReuseState<std::deque<int64_t>>();
}
TEST(ChunkedFor, LoopWithVectorStateReuse) {
  loopWithStateImplReuseState<std::vector<int64_t>>();
}
TEST(ChunkedFor, LoopWithListStateReuse) {
  loopWithStateImplReuseState<std::list<int64_t>>();
}


================================================
FILE: tests/completion_event_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <deque>
#include <thread>

#include <dispenso/completion_event.h>

#include <gtest/gtest.h>

TEST(CompletionEvent, NotifyBeforeWait) {
  dispenso::CompletionEvent event;

  event.notify();
  // Should immediately return;
  event.wait();
}

TEST(CompletionEvent, NotifyBeforeWaitFor) {
  dispenso::CompletionEvent event;

  event.notify();
  // Should immediately return;
  EXPECT_TRUE(event.waitFor(std::chrono::microseconds(1)));
}

// In an ideal world, we could expect the following test to loop 10 times or so.  In reality, we
// can't make such guarantees when it comes to sleep() and wait() functions.  For instance, on Linux
// with 64 mostly-idle cores, 100 out of 100 runs of this test resulted in looping between 8 and 12
// times, even under TSAN.  On Mac with 4 less-idle cores, the test would pass about 90 out of 100.
// Inflating the interval to 7 to 13 passed 98 out of 100.  In the end, we cannot really count on
// any concrete number of times through the loop (think TSAN, think loaded machine, etc...), and so
// we simply let this test fall back to "will this time out?".
TEST(CompletionEvent, WaitForSomeTime) {
  dispenso::CompletionEvent event;

  std::thread t([&event]() {
    std::this_thread::sleep_for(std::chrono::milliseconds(20));
    event.notify();
  });

  while (true) {
    if (event.waitFor(std::chrono::milliseconds(2))) {
      break;
    }
  }

  t.join();
}

TEST(CompletionEvent, WaitForSomeTimeWithReset) {
  dispenso::CompletionEvent event;
  std::atomic<bool> barrier(0);

  std::thread t([&event, &barrier]() {
    std::this_thread::sleep_for(std::chrono::milliseconds(20));
    event.notify();

    while (!barrier.load(std::memory_order_acquire)) {
    }
    std::this_thread::sleep_for(std::chrono::milliseconds(20));
    event.notify();
  });

  while (!(event.waitFor(std::chrono::milliseconds(2)))) {
  }

  EXPECT_TRUE(event.waitFor(std::chrono::microseconds(1))) << "This should immediately return true";

  // No threads waiting, nor notifying, so we can reset.
  event.reset();

  // Trigger the barrier so that the event can be notified.
  barrier.store(1, std::memory_order_release);

  while (true) {
    if (event.waitFor(std::chrono::milliseconds(2))) {
      break;
    }
  }

  t.join();
}

TEST(CompletionEvent, EffectiveBarrier) {
  dispenso::CompletionEvent event;

  std::deque<std::thread> threads;

  std::atomic<int> count(0);

  constexpr int kThreads = 4;

  for (size_t i = 0; i < kThreads; ++i) {
    threads.emplace_back([&event, &count]() {
      count.fetch_sub(1, std::memory_order_relaxed);
      event.wait();
      count.fetch_add(2, std::memory_order_relaxed);
    });
  }

  while (count.load(std::memory_order_acquire) > -kThreads) {
  }

  // Take a long rest in this thread.  This gives us a chance to ensure that the event cannot
  // spurious wake, and begin modifying "count".
  std::this_thread::sleep_for(std::chrono::milliseconds(20));

  EXPECT_EQ(-kThreads, count.load(std::memory_order_acquire));

  event.notify();

  for (auto& t : threads) {
    t.join();
  }

  EXPECT_EQ(kThreads, count.load(std::memory_order_acquire));
}


================================================
FILE: tests/concurrent_object_arena_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/concurrent_object_arena.h>
#include <dispenso/task_set.h>

#include <gtest/gtest.h>

TEST(ConcurrentObjectArena, ParallelGrowBy) {
  constexpr size_t numTasks = 20;
  constexpr size_t numLoops = 100;
  constexpr size_t delta = 7;
  constexpr size_t bufSize = 16;

  dispenso::ConcurrentObjectArena<size_t> arena(bufSize);

  dispenso::TaskSet taskSet(dispenso::globalThreadPool());

  for (size_t ti = 0; ti < numTasks; ++ti) {
    taskSet.schedule([=, &arena]() {
      for (size_t i = 0; i < numLoops; i++) {
        const size_t p = arena.grow_by(delta);
        for (size_t j = 0; j < delta; j++) {
          arena[p + j] = ti * numLoops * delta + i;
        }
      }
    });
  }
  taskSet.wait();

  EXPECT_EQ(delta * numLoops * numTasks, arena.size());
  EXPECT_EQ(arena.capacity() / arena.numBuffers(), arena.getBufferSize(0));

  size_t totalSize = 0;
  for (size_t i = 0; i < arena.numBuffers(); ++i) {
    totalSize += arena.getBufferSize(i);
  }
  EXPECT_EQ(totalSize, arena.size());

  for (size_t i = 0; i < numLoops * numTasks; i++) {
    const size_t firstElement = arena[i * delta];
    for (size_t j = 1; j < delta; j++) {
      EXPECT_EQ(arena[i * delta + j], firstElement);
    }
  }
}

TEST(ConcurrentObjectArena, ObjectsConstuction) {
  constexpr size_t defaultValue = 17;
  constexpr size_t bufSize = 16;
  constexpr size_t smallGrow = bufSize / 3;
  constexpr size_t bigGrow = bufSize * 3;

  struct TestData {
    TestData() : value(defaultValue) {}
    size_t value;
  };

  dispenso::ConcurrentObjectArena<TestData>* arena =
      new dispenso::ConcurrentObjectArena<TestData>(bufSize);

  arena->grow_by(smallGrow);
  arena->grow_by(bigGrow);

  const size_t num = arena->size();
  for (size_t i = 0; i < num; ++i) {
    EXPECT_EQ((*arena)[i].value, defaultValue);
  }

  dispenso::ConcurrentObjectArena<TestData> copyArena(*arena);

  dispenso::ConcurrentObjectArena<TestData> copyAssignmentArena(bufSize / 2);
  copyAssignmentArena = *arena;

  EXPECT_EQ(copyArena.size(), arena->size());
  EXPECT_EQ(copyAssignmentArena.size(), arena->size());

  const size_t numBuffers = arena->numBuffers();
  std::vector<const TestData*> bufferPtrs(numBuffers);
  for (size_t i = 0; i < numBuffers; ++i) {
    bufferPtrs[i] = arena->getBuffer(i);
  }

  dispenso::ConcurrentObjectArena<TestData> moveArena(std::move(*arena));

  EXPECT_EQ(arena->size(), 0);
  EXPECT_EQ(arena->numBuffers(), 0);
  EXPECT_EQ(arena->capacity(), 0);

  delete arena;

  EXPECT_EQ(copyArena.numBuffers(), numBuffers);
  EXPECT_EQ(copyAssignmentArena.numBuffers(), numBuffers);

  for (size_t i = 0; i < num; ++i) {
    EXPECT_EQ(copyArena[i].value, defaultValue);
    EXPECT_EQ(copyAssignmentArena[i].value, defaultValue);
  }

  for (size_t i = 0; i < numBuffers; ++i) {
    EXPECT_NE(copyArena.getBuffer(i), bufferPtrs[i]);
    EXPECT_NE(copyAssignmentArena.getBuffer(i), bufferPtrs[i]);
    EXPECT_EQ(moveArena.getBuffer(i), bufferPtrs[i]);
  }
}

TEST(ConcurrentObjectArena, BufferSizeRounding) {
  // Test that non-power-of-2 buffer sizes are rounded up
  // minBuffSize = 10 should round up to 16 (next power of 2)
  dispenso::ConcurrentObjectArena<int> arena(10);

  arena.grow_by(20);

  // Capacity should be a multiple of 16 (the rounded buffer size)
  EXPECT_EQ(arena.capacity() % 16, 0u);

  // Buffer size should be 16
  EXPECT_EQ(arena.capacity() / arena.numBuffers(), 16u);
}

TEST(ConcurrentObjectArena, ExactPowerOfTwoBufferSize) {
  // Test that exact power-of-2 buffer sizes are preserved
  dispenso::ConcurrentObjectArena<int> arena(32);

  arena.grow_by(100);

  // Buffer size should be exactly 32
  EXPECT_EQ(arena.capacity() / arena.numBuffers(), 32u);
}

TEST(ConcurrentObjectArena, MoveAssignment) {
  dispenso::ConcurrentObjectArena<int> arena1(16);
  dispenso::ConcurrentObjectArena<int> arena2(8);

  arena1.grow_by(50);
  for (size_t i = 0; i < 50; ++i) {
    arena1[i] = static_cast<int>(i * 2);
  }

  arena2.grow_by(10);
  for (size_t i = 0; i < 10; ++i) {
    arena2[i] = static_cast<int>(i * 3);
  }

  size_t arena1Size = arena1.size();
  size_t arena1NumBuffers = arena1.numBuffers();
  size_t arena2Size = arena2.size();

  // Move assign arena1 to arena2 (uses swap internally)
  arena2 = std::move(arena1);

  // arena2 should now have arena1's contents
  EXPECT_EQ(arena2.size(), arena1Size);
  EXPECT_EQ(arena2.numBuffers(), arena1NumBuffers);
  for (size_t i = 0; i < arena1Size; ++i) {
    EXPECT_EQ(arena2[i], static_cast<int>(i * 2));
  }

  // arena1 now has arena2's old contents (swap behavior)
  EXPECT_EQ(arena1.size(), arena2Size);
}

TEST(ConcurrentObjectArena, SwapFunction) {
  dispenso::ConcurrentObjectArena<int> arena1(16);
  dispenso::ConcurrentObjectArena<int> arena2(32);

  arena1.grow_by(20);
  for (size_t i = 0; i < 20; ++i) {
    arena1[i] = 100;
  }

  arena2.grow_by(40);
  for (size_t i = 0; i < 40; ++i) {
    arena2[i] = 200;
  }

  size_t size1 = arena1.size();
  size_t size2 = arena2.size();

  swap(arena1, arena2);

  EXPECT_EQ(arena1.size(), size2);
  EXPECT_EQ(arena2.size(), size1);

  for (size_t i = 0; i < arena1.size(); ++i) {
    EXPECT_EQ(arena1[i], 200);
  }
  for (size_t i = 0; i < arena2.size(); ++i) {
    EXPECT_EQ(arena2[i], 100);
  }
}

TEST(ConcurrentObjectArena, ConstAccess) {
  dispenso::ConcurrentObjectArena<int> arena(16);
  arena.grow_by(10);
  for (size_t i = 0; i < 10; ++i) {
    arena[i] = static_cast<int>(i);
  }

  const dispenso::ConcurrentObjectArena<int>& constArena = arena;

  // Test const operator[]
  for (size_t i = 0; i < 10; ++i) {
    EXPECT_EQ(constArena[i], static_cast<int>(i));
  }

  // Test const getBuffer
  const int* buf = constArena.getBuffer(0);
  EXPECT_NE(buf, nullptr);

  // Test const size, capacity, numBuffers
  EXPECT_EQ(constArena.size(), 10u);
  EXPECT_GE(constArena.capacity(), 10u);
  EXPECT_GE(constArena.numBuffers(), 1u);

  // Test const getBufferSize
  EXPECT_GT(constArena.getBufferSize(0), 0u);
}

TEST(ConcurrentObjectArena, DifferentIndexType) {
  // Test with uint32_t as Index type
  dispenso::ConcurrentObjectArena<int, uint32_t> arena(16);

  arena.grow_by(100);
  for (uint32_t i = 0; i < 100; ++i) {
    arena[i] = static_cast<int>(i * 5);
  }

  for (uint32_t i = 0; i < 100; ++i) {
    EXPECT_EQ(arena[i], static_cast<int>(i * 5));
  }

  EXPECT_EQ(arena.size(), 100u);
}

TEST(ConcurrentObjectArena, CustomAlignment) {
  // Test with custom alignment
  constexpr size_t kAlignment = 128;
  dispenso::ConcurrentObjectArena<int, size_t, kAlignment> arena(16);

  arena.grow_by(10);

  // Verify buffer pointer alignment
  const int* buf = arena.getBuffer(0);
  EXPECT_EQ(reinterpret_cast<uintptr_t>(buf) % kAlignment, 0u);
}

TEST(ConcurrentObjectArena, GrowByZero) {
  dispenso::ConcurrentObjectArena<int> arena(16);

  size_t initialSize = arena.size();
  arena.grow_by(0);
  EXPECT_EQ(arena.size(), initialSize);
}

TEST(ConcurrentObjectArena, SingleElementGrowth) {
  dispenso::ConcurrentObjectArena<int> arena(16);

  // Grow by 1 repeatedly
  for (int i = 0; i < 50; ++i) {
    size_t idx = arena.grow_by(1);
    arena[idx] = i;
  }

  EXPECT_EQ(arena.size(), 50u);

  for (int i = 0; i < 50; ++i) {
    EXPECT_EQ(arena[static_cast<size_t>(i)], i);
  }
}


================================================
FILE: tests/concurrent_vector_a_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "concurrent_vector_test_common_types.h"

using TestTraitsTypes = ::testing::Types<TestTraitsA>;
DISPENSO_DISABLE_WARNING_PUSH
DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
DISPENSO_DISABLE_WARNING_POP

#include "concurrent_vector_test_common.h"


================================================
FILE: tests/concurrent_vector_b_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "concurrent_vector_test_common_types.h"

using TestTraitsTypes = ::testing::Types<TestTraitsB>;
DISPENSO_DISABLE_WARNING_PUSH
DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
DISPENSO_DISABLE_WARNING_POP

#include "concurrent_vector_test_common.h"


================================================
FILE: tests/concurrent_vector_default_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include "concurrent_vector_test_common_types.h"

using TestTraitsTypes = ::testing::Types<dispenso::DefaultConcurrentVectorTraits>;
DISPENSO_DISABLE_WARNING_PUSH
DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
DISPENSO_DISABLE_WARNING_POP

#include "concurrent_vector_test_common.h"


================================================
FILE: tests/concurrent_vector_nocache_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Force the opposite cache-pointer mode for this platform so CI exercises
// whichever code path is normally skipped.
#if !defined(__aarch64__) && !defined(_M_ARM64)
#define DISPENSO_HAS_CACHED_PTRS 0
#else
#define DISPENSO_HAS_CACHED_PTRS 1
#endif

#include "concurrent_vector_test_common_types.h"

using TestTraitsTypes = ::testing::Types<dispenso::DefaultConcurrentVectorTraits>;
DISPENSO_DISABLE_WARNING_PUSH
DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
DISPENSO_DISABLE_WARNING_POP

#include "concurrent_vector_test_common.h"


================================================
FILE: tests/concurrent_vector_test_common.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

template <typename CVec, typename Func>
void runVariedTest(int num, Func func) {
  {
    CVec vec;
    func(num, vec);
  }
  {
    CVec vec(num / 3, dispenso::ReserveTag);
    func(num, vec);
  }
  {
    CVec vec;
    vec.reserve(num / 2);
    func(num, vec);
  }
}

#define RUN_VARIED_TEST(len, func, EltType)                      \
  runVariedTest<dispenso::ConcurrentVector<EltType, TypeParam>>( \
      (len), func<dispenso::ConcurrentVector<EltType, TypeParam>>)

template <typename CVec, typename Func>
void runVariedTest2(int num, int oth, Func func) {
  {
    CVec vec;
    func(num, oth, vec);
  }
  {
    CVec vec(num / 3, dispenso::ReserveTag);
    func(num, oth, vec);
  }
  {
    CVec vec;
    vec.reserve(num / 2);
    func(num, oth, vec);
  }
}

#define RUN_VARIED_TEST2(len, oth, func, EltType)                 \
  runVariedTest2<dispenso::ConcurrentVector<EltType, TypeParam>>( \
      (len), (oth), func<dispenso::ConcurrentVector<EltType, TypeParam>>)

template <typename CVec>
void indexCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  for (size_t index = 0; index < vec.size(); ++index) {
    EXPECT_EQ(static_cast<int>(index), *vec[index]);
  }

  const auto& cvec = vec;

  for (size_t index = 0; index < cvec.size(); ++index) {
    EXPECT_EQ(static_cast<int>(index), *cvec[index]);
  }
}

TYPED_TEST(ConcurrentVectorTest, UninitializedIterator) {
  typename dispenso::ConcurrentVector<int, TypeParam>::iterator iter;
  (void)iter;
}

TYPED_TEST(ConcurrentVectorTest, IndexCorrectTiny) {
  RUN_VARIED_TEST(4, indexCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IndexCorrectSmall) {
  RUN_VARIED_TEST(12, indexCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IndexCorrectMedium) {
  RUN_VARIED_TEST(513, indexCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IndexCorrectLarge) {
  RUN_VARIED_TEST(1 << 13, indexCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IndexCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 13) + 1, indexCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void iterateCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  int index = 0;

  for (auto& v : vec) {
    EXPECT_EQ(index, *v);
    ++index;
  }

  const auto& cvec = vec;

  index = 0;
  for (auto& v : cvec) {
    EXPECT_EQ(index, *v);
    ++index;
  }
}

TYPED_TEST(ConcurrentVectorTest, IterateCorrectTiny) {
  RUN_VARIED_TEST(3, iterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IterateCorrectSmall) {
  RUN_VARIED_TEST(37, iterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IterateCorrectMedium) {
  RUN_VARIED_TEST(768, iterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IterateCorrectLarge) {
  RUN_VARIED_TEST(1 << 12, iterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, IterateCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 12) + 1, iterateCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void clearAndReuseCorrect(int num, CVec& vec) {
  for (int round = 0; round < 4; ++round) {
    vec.clear();
    for (int i = 0; i < num; ++i) {
      vec.push_back(std::make_unique<int>(i));
    }

    int index = 0;

    for (auto& v : vec) {
      EXPECT_EQ(index, *v);
      ++index;
    }
  }
}

TYPED_TEST(ConcurrentVectorTest, ClearAndReuseCorrectTiny) {
  RUN_VARIED_TEST(1, clearAndReuseCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ClearAndReuseCorrectSmall) {
  RUN_VARIED_TEST(90, clearAndReuseCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ClearAndReuseCorrectMedium) {
  RUN_VARIED_TEST(129, clearAndReuseCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ClearAndReuseCorrectLarge) {
  RUN_VARIED_TEST(1 << 11, clearAndReuseCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ClearAndReuseCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, clearAndReuseCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void copyConstructor(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  for (int round = 0; round < 4; ++round) {
    CVec newVec(vec);
    int index = 0;
    for (auto& v : newVec) {
      EXPECT_EQ(index, *v);
      ++index;
    }
  }
  CVec newVec(vec);
  for (int i = num; i < 2 * num; ++i) {
    newVec.push_back(std::make_unique<int>(i));
  }

  int sum = 0;
  for (auto& v : newVec) {
    sum += *v;
  }
  int maxV = 2 * num - 1;

  EXPECT_EQ(sum, (maxV * (maxV + 1)) / 2);
}

TYPED_TEST(ConcurrentVectorTest, CopyConstructorTiny) {
  RUN_VARIED_TEST(1, copyConstructor, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyConstructorSmall) {
  RUN_VARIED_TEST(90, copyConstructor, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyConstructorMedium) {
  RUN_VARIED_TEST(129, copyConstructor, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyConstructorLarge) {
  RUN_VARIED_TEST(1 << 11, copyConstructor, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyConstructorLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, copyConstructor, std::shared_ptr<int>);
}

template <typename CVec>
void moveConstructor(int num, CVec& vec) {
  for (int round = 0; round < 4; ++round) {
    EXPECT_TRUE(vec.empty());
    for (int i = 0; i < num; ++i) {
      vec.push_back(std::make_unique<int>(i));
    }

    CVec newVecMoved(std::move(vec));
    EXPECT_EQ(newVecMoved.size(), num);
    EXPECT_TRUE(vec.empty());
    int index = 0;
    for (auto& v : newVecMoved) {
      EXPECT_EQ(index, *v);
      ++index;
    }
  }
}

TYPED_TEST(ConcurrentVectorTest, MoveConstructorTiny) {
  RUN_VARIED_TEST(1, moveConstructor, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveConstructorSmall) {
  RUN_VARIED_TEST(90, moveConstructor, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveConstructorMedium) {
  RUN_VARIED_TEST(129, moveConstructor, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveConstructorLarge) {
  RUN_VARIED_TEST(1 << 11, moveConstructor, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveConstructorLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, moveConstructor, std::unique_ptr<int>);
}

template <typename CVec>
void copyOperator(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_shared<int>(i));
  }
  CVec newVec;
  for (int round = 0; round < 4; ++round) {
    newVec = vec;
    EXPECT_EQ(newVec.size(), vec.size());
    int index = 0;
    for (auto& v : newVec) {
      EXPECT_EQ(index, *v);
      ++index;
    }
  }

  for (int i = num; i < 2 * num; ++i) {
    newVec.push_back(std::make_shared<int>(i));
  }

  int sum = 0;
  for (auto& v : newVec) {
    sum += *v;
  }
  int maxV = 2 * num - 1;

  EXPECT_EQ(sum, (maxV * (maxV + 1)) / 2);
}

TYPED_TEST(ConcurrentVectorTest, CopyOperatorTiny) {
  RUN_VARIED_TEST(7, copyOperator, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyOperatorSmall) {
  RUN_VARIED_TEST(127, copyOperator, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyOperatorMedium) {
  RUN_VARIED_TEST(511, copyOperator, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyOperatorLarge) {
  RUN_VARIED_TEST(1 << 9, copyOperator, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, CopyOperatorLargeP1) {
  RUN_VARIED_TEST((1 << 9) + 1, copyOperator, std::shared_ptr<int>);
}

template <typename CVec>
void moveOperator(int num, CVec& vec) {
  CVec newVecMoved;
  for (int round = 0; round < 4; ++round) {
    EXPECT_TRUE(vec.empty());
    for (int i = 0; i < num; ++i) {
      vec.push_back(std::make_unique<int>(i));
    }
    newVecMoved = std::move(vec);
    EXPECT_EQ(newVecMoved.size(), num);
    int index = 0;
    for (auto& v : newVecMoved) {
      EXPECT_EQ(index, *v);
      ++index;
    }
  }
}

TYPED_TEST(ConcurrentVectorTest, MoveOperatorTiny) {
  RUN_VARIED_TEST(17, moveOperator, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveOperatorSmall) {
  RUN_VARIED_TEST(901, moveOperator, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveOperatorMedium) {
  RUN_VARIED_TEST(1102, moveOperator, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveOperatorLarge) {
  RUN_VARIED_TEST(1 << 11, moveOperator, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, MoveOperatorLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, moveOperator, std::unique_ptr<int>);
}

template <typename CVec>
void shrinkToFit(int num, CVec& vec) {
  vec.shrink_to_fit();

  EXPECT_EQ(vec.capacity(), vec.default_capacity());

  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  size_t maxCapacity = vec.capacity();

  EXPECT_LE(
      maxCapacity,
      2 *
          std::max<size_t>(
              static_cast<size_t>(dispenso::detail::nextPow2(vec.size() + 1)),
              vec.default_capacity()))
      << "Num: " << num << " default: " << vec.default_capacity();

  for (int i = 0; i < num / 2; ++i) {
    vec.pop_back();
  }

  EXPECT_EQ(vec.capacity(), maxCapacity);

  vec.shrink_to_fit();

  EXPECT_LE(vec.capacity(), maxCapacity);

  size_t afterPopNShrinkCap = vec.capacity();

  EXPECT_LE(
      afterPopNShrinkCap,
      2 *
          std::max<size_t>(
              static_cast<size_t>(dispenso::detail::nextPow2(vec.size() + 1)),
              vec.default_capacity()))
      << "Size: " << vec.size() << " Num: " << num << " default: " << vec.default_capacity();

  vec.clear();

  EXPECT_EQ(vec.capacity(), afterPopNShrinkCap);

  vec.shrink_to_fit();

  EXPECT_EQ(vec.capacity(), vec.default_capacity());
}

TYPED_TEST(ConcurrentVectorTest, ShrinkToFitTiny) {
  RUN_VARIED_TEST(17, shrinkToFit, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ShrinkToFitSmall) {
  RUN_VARIED_TEST(901, shrinkToFit, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ShrinkToFitMedium) {
  RUN_VARIED_TEST(1102, shrinkToFit, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ShrinkToFitLarge) {
  RUN_VARIED_TEST(1 << 13, shrinkToFit, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ShrinkToFitLargeP1) {
  RUN_VARIED_TEST((1 << 13) + 1, shrinkToFit, std::unique_ptr<int>);
}

template <typename T>
struct ReverseWrapper {
  T& iterable;
};

template <typename T>
auto begin(ReverseWrapper<T> w) {
  return std::rbegin(w.iterable);
}

template <typename T>
auto end(ReverseWrapper<T> w) {
  return std::rend(w.iterable);
}

template <typename T>
ReverseWrapper<T> reverse(T&& iterable) {
  return {iterable};
}

template <typename CVec>
void reverseIterateCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  int index = num;

  for (auto& v : reverse(vec)) {
    --index;
    EXPECT_EQ(index, *v);
  }

  const auto& cvec = vec;
  index = num;
  for (auto& v : reverse(cvec)) {
    --index;
    EXPECT_EQ(index, *v);
  }
}

TYPED_TEST(ConcurrentVectorTest, ReverseIterateCorrectTiny) {
  RUN_VARIED_TEST(6, reverseIterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ReverseIterateCorrectSmall) {
  RUN_VARIED_TEST(73, reverseIterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ReverseIterateCorrectMedium) {
  RUN_VARIED_TEST(677, reverseIterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ReverseIterateCorrectLarge) {
  RUN_VARIED_TEST(1 << 13, reverseIterateCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ReverseIterateCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 13) + 1, reverseIterateCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void binarySearchCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  for (int i = 0; i < num; ++i) {
    EXPECT_EQ(
        std::lower_bound(
            std::begin(vec), std::end(vec), i, [](const auto& up, int val) { return *up < val; }) -
            std::begin(vec),
        i);
  }
}

TYPED_TEST(ConcurrentVectorTest, BinarySearchCorrectTiny) {
  RUN_VARIED_TEST(11, binarySearchCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, BinarySearchCorrectSmall) {
  RUN_VARIED_TEST(59, binarySearchCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, BinarySearchCorrectMedium) {
  RUN_VARIED_TEST(711, binarySearchCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, BinarySearchCorrectLarge) {
  RUN_VARIED_TEST(1 << 11, binarySearchCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, BinarySearchCorrectLargeP3) {
  RUN_VARIED_TEST((1 << 11) + 3, binarySearchCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void operatorPlusCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  auto start = std::begin(vec);
  for (int i = 0; i < num; ++i) {
    EXPECT_EQ(**(start + i), i);
  }
  auto end = std::end(vec) - 1;
  for (int i = 0; i < num; ++i) {
    EXPECT_EQ(**(end - i), num - i - 1);
  }
}

TYPED_TEST(ConcurrentVectorTest, OperatorPlusCorrectTiny) {
  RUN_VARIED_TEST(11, operatorPlusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorPlusCorrectSmall) {
  RUN_VARIED_TEST(59, operatorPlusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorPlusCorrectMedium) {
  RUN_VARIED_TEST(711, operatorPlusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorPlusCorrectLarge) {
  RUN_VARIED_TEST(1 << 11, operatorPlusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorPlusCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 3, operatorPlusCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void operatorMinusCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  auto start = std::begin(vec);
  auto it = start;
  for (int i = 0; i < num; ++i, ++it) {
    EXPECT_EQ(it - start, i);
  }

  auto end = std::end(vec);

  EXPECT_EQ(end, it);

  for (int i = 0; i < num; ++i, --it) {
    EXPECT_EQ(end - it, i);
  }

  auto mid = start + num / 2;
  it = start;
  for (int i = 0; i < num; ++i) {
    EXPECT_EQ(mid - it++, num / 2 - i);
  }
}

TYPED_TEST(ConcurrentVectorTest, OperatorMinusCorrectTiny) {
  RUN_VARIED_TEST(11, operatorMinusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorMinusCorrectSmall) {
  RUN_VARIED_TEST(59, operatorMinusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorMinusCorrectMedium) {
  RUN_VARIED_TEST(711, operatorMinusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorMinusCorrectLarge) {
  RUN_VARIED_TEST(711, operatorMinusCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorMinusCorrectLargeP1) {
  RUN_VARIED_TEST(711, operatorMinusCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void operatorComparisonsCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  auto start = std::begin(vec);
  auto it = start;
  EXPECT_FALSE(it > start);
  EXPECT_FALSE(start < it);
  EXPECT_FALSE(it < start);
  EXPECT_FALSE(start > it);
  EXPECT_TRUE(it >= start);
  EXPECT_TRUE(start <= it);
  for (int i = 0; i < num; ++i) {
    ++it;
    EXPECT_TRUE(it > start);
    EXPECT_TRUE(it >= start);
    EXPECT_TRUE(start < it);
    EXPECT_TRUE(start <= it);
  }

  auto end = std::end(vec);

  EXPECT_EQ(end, it);

  EXPECT_TRUE(end >= it);
  EXPECT_TRUE(it <= end);
  EXPECT_FALSE(end > it);
  EXPECT_FALSE(it < end);
  EXPECT_FALSE(it > end);
  EXPECT_FALSE(end < it);
  for (int i = 0; i < num; ++i) {
    --it;
    EXPECT_TRUE(it < end);
    EXPECT_TRUE(it <= end);
    EXPECT_TRUE(end > it);
    EXPECT_TRUE(end >= it);
  }

  EXPECT_EQ(start, it);

  int midLen = num / 2;
  auto mid = start + midLen;

  for (int i = 0; i < midLen; ++i, ++it) {
    EXPECT_TRUE(mid > it);
    EXPECT_TRUE(mid >= it);
    EXPECT_TRUE(it < mid);
    EXPECT_TRUE(it <= mid);
  }

  EXPECT_TRUE(mid == it);
  EXPECT_TRUE(mid >= it);
  EXPECT_TRUE(it <= mid);
  EXPECT_FALSE(mid < it);
  EXPECT_FALSE(it > mid);
  for (int i = midLen; i < num; ++i) {
    ++it;
    EXPECT_TRUE(mid < it);
    EXPECT_TRUE(mid <= it);
    EXPECT_TRUE(it > mid);
    EXPECT_TRUE(it >= mid);
  }
}

TYPED_TEST(ConcurrentVectorTest, OperatorComparisonsCorrectTiny) {
  RUN_VARIED_TEST(0, operatorComparisonsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorComparisonsCorrectSmall) {
  RUN_VARIED_TEST(65, operatorComparisonsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorComparisonsCorrectMedium) {
  RUN_VARIED_TEST(511, operatorComparisonsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorComparisonsCorrectLarge) {
  RUN_VARIED_TEST(1 << 12, operatorComparisonsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorComparisonsCorrectLargeP3) {
  RUN_VARIED_TEST((1 << 12) + 3, operatorComparisonsCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void operatorBracketsCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  auto start = std::begin(vec);
  auto it = start;
  for (int i = 0; i < num; ++i, ++it) {
    for (int j = -i, k = 0; j < 0; ++j, ++k) {
      EXPECT_EQ(*it[j], k);
    }
    for (int j = 0, k = i; j < num - i; ++j, ++k) {
      EXPECT_EQ(*it[j], k);
    }
  }
}

TYPED_TEST(ConcurrentVectorTest, OperatorBracketsCorrectTiny) {
  RUN_VARIED_TEST(13, operatorBracketsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorBracketsCorrectSmall) {
  RUN_VARIED_TEST(90, operatorBracketsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorBracketsCorrectMedium) {
  RUN_VARIED_TEST(399, operatorBracketsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorBracketsCorrectLarge) {
  RUN_VARIED_TEST(1 << 9, operatorBracketsCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, OperatorBracketsCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 9) + 2, operatorBracketsCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void growByDefaultCorrect(int num, CVec& vec) {
  for (int i = 0; i < 5; ++i) {
    vec.grow_by(num);
  }

  EXPECT_EQ(vec.size(), num * 5);

  for (auto& uv : vec) {
    EXPECT_TRUE(!uv);
  }
}

TYPED_TEST(ConcurrentVectorTest, GrowByDefaultCorrectTiny) {
  RUN_VARIED_TEST(17, growByDefaultCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByDefaultCorrectSmall) {
  RUN_VARIED_TEST(91, growByDefaultCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByDefaultCorrectMedium) {
  RUN_VARIED_TEST(499, growByDefaultCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByDefaultCorrectLarge) {
  RUN_VARIED_TEST(1 << 10, growByDefaultCorrect, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByDefaultCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 10) + 1, growByDefaultCorrect, std::unique_ptr<int>);
}

template <typename CVec>
void growByConstantCorrect(int num, CVec& vec) {
  for (int i = 0; i < 5; ++i) {
    vec.grow_by(num, std::make_shared<int>(4));
  }

  size_t result = 0;

  for (auto& v : vec) {
    result += *v + 1;
  }

  EXPECT_EQ(result, 5 * vec.size());
  EXPECT_EQ(result, 5 * 5 * num);
}

TYPED_TEST(ConcurrentVectorTest, GrowByConstantCorrectTiny) {
  RUN_VARIED_TEST(12, growByConstantCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConstantCorrectSmall) {
  RUN_VARIED_TEST(81, growByConstantCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConstantCorrectMedium) {
  RUN_VARIED_TEST(300, growByConstantCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConstantCorrectLarge) {
  RUN_VARIED_TEST(1 << 9, growByConstantCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConstantCorrectLargeP7) {
  RUN_VARIED_TEST((1 << 9) + 7, growByConstantCorrect, std::shared_ptr<int>);
}

template <typename CVec>
void growByInitListCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.grow_by(
        {std::make_shared<int>(0),
         std::make_shared<int>(1),
         std::make_shared<int>(2),
         std::make_shared<int>(3),
         std::make_shared<int>(4),
         std::make_shared<int>(5),
         std::make_shared<int>(6),
         std::make_shared<int>(7),
         std::make_shared<int>(8),
         std::make_shared<int>(9),
         std::make_shared<int>(10)});
  }

  EXPECT_EQ(vec.size(), 11 * num);

  size_t result = 0;

  for (auto& v : vec) {
    result += *v;
  }

  EXPECT_EQ(result, 55 * num);
}

TYPED_TEST(ConcurrentVectorTest, GrowByInitListCorrectTiny) {
  RUN_VARIED_TEST(17, growByInitListCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByInitListCorrectSmall) {
  RUN_VARIED_TEST(91, growByInitListCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByInitListCorrectMedium) {
  RUN_VARIED_TEST(499, growByInitListCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByInitListCorrectLarge) {
  RUN_VARIED_TEST(1 << 10, growByInitListCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByInitListCorrectLargeP1) {
  RUN_VARIED_TEST((1 << 10) + 1, growByInitListCorrect, std::shared_ptr<int>);
}

template <typename CVec>
void growBySquaredCorrect(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.grow_by(num, std::make_shared<int>(10));
  }

  size_t result = 0;

  for (auto& v : vec) {
    result += *v;
  }

  EXPECT_EQ(result, 10 * vec.size());
  EXPECT_EQ(result, 10 * num * num);
}

TYPED_TEST(ConcurrentVectorTest, GrowBySquaredCorrectTiny) {
  RUN_VARIED_TEST(14, growBySquaredCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowBySquaredCorrectSmall) {
  RUN_VARIED_TEST(79, growBySquaredCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowBySquaredCorrectMedium) {
  RUN_VARIED_TEST(129, growBySquaredCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowBySquaredCorrectLarge) {
  RUN_VARIED_TEST(191, growBySquaredCorrect, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowBySquaredCorrectLargeP1) {
  RUN_VARIED_TEST(193, growBySquaredCorrect, std::shared_ptr<int>);
}

template <typename CVec>
void growByConcurrent(int num, int growBy, CVec& vec) {
  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, num, dispenso::ParForChunking::kStatic),
      [&vec, growBy](int i, int end) {
        while (i + growBy <= end) {
          vec.grow_by_generator(growBy, [i]() mutable { return std::make_unique<int>(i++); });
          i += growBy;
        }
        vec.grow_by_generator(end - i, [i]() mutable { return std::make_unique<int>(i++); });
      });

  EXPECT_EQ(vec.size(), num);

  std::vector<uint8_t> which(static_cast<size_t>(num));
  for (auto& i : vec) {
    ++which[*i];
  }

  for (size_t i = 0; i < which.size(); ++i) {
    EXPECT_EQ(1, which[i]) << "mismatch for index " << i;
  }
}

TYPED_TEST(ConcurrentVectorTest, GrowByConcurrentTiny) {
  RUN_VARIED_TEST2(14, 2, growByConcurrent, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConcurrentSmall) {
  RUN_VARIED_TEST2(79, 4, growByConcurrent, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConcurrentMedium) {
  RUN_VARIED_TEST2(200, 8, growByConcurrent, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConcurrentLarge) {
  RUN_VARIED_TEST2(1 << 8, 16, growByConcurrent, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, GrowByConcurrentLargeP1) {
  RUN_VARIED_TEST2((1 << 8) + 1, 1, growByConcurrent, std::unique_ptr<int>);
}

template <typename CVec>
void assignThenPush(int num, CVec& vec) {
  std::vector<std::shared_ptr<int>> vals;
  for (int i = 0; i < num; ++i) {
    vals.push_back(std::make_shared<int>(i));
  }

  vec.assign(std::begin(vals), std::end(vals));

  for (int i = num; i < 2 * num; ++i) {
    vec.push_back(std::make_shared<int>(i));
  }

  int result = 0;

  for (auto& v : vec) {
    result += *v;
  }

  int maxV = 2 * num - 1;

  EXPECT_EQ(result, (maxV * (maxV + 1)) / 2);
}

TYPED_TEST(ConcurrentVectorTest, AssignThenPushTiny) {
  RUN_VARIED_TEST(11, assignThenPush, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AssignThenPushSmall) {
  RUN_VARIED_TEST(81, assignThenPush, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AssignThenPushMedium) {
  RUN_VARIED_TEST(317, assignThenPush, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AssignThenPushLarge) {
  RUN_VARIED_TEST(1 << 9, assignThenPush, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AssignThenPushLargeP1) {
  RUN_VARIED_TEST((1 << 9) + 1, assignThenPush, std::shared_ptr<int>);
}

template <typename CVec>
void at(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  int result = 0;
  for (int i = 0; i < num; ++i) {
    result += *vec.at(i);
  }

  int maxV = num - 1;

  EXPECT_EQ(result, (maxV * (maxV + 1)) / 2);

#if defined(__cpp_exceptions)
  bool caught = false;
  try {
    EXPECT_EQ(0, *vec.at(num));
  } catch (const std::out_of_range&) {
    caught = true;
  }

  EXPECT_TRUE(caught);
#endif // exceptions
}

TYPED_TEST(ConcurrentVectorTest, AtTiny) {
  RUN_VARIED_TEST(8, at, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AtSmall) {
  RUN_VARIED_TEST(65, at, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AtMedium) {
  RUN_VARIED_TEST(255, at, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AtLarge) {
  RUN_VARIED_TEST(1 << 9, at, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, AtLargeP2) {
  RUN_VARIED_TEST((1 << 9) + 2, at, std::unique_ptr<int>);
}

template <typename CVec>
void resize(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  vec.resize(num / 2);

  int result = 0;
  for (auto& v : vec) {
    if (v) {
      result += *v;
    }
  }

  int maxV = num / 2 - 1;

  EXPECT_EQ(result, (maxV * (maxV + 1)) / 2);

  // default init for new values should be null
  vec.resize(num);

  result = 0;
  for (auto& v : vec) {
    if (v) {
      result += *v;
    }
  }

  EXPECT_EQ(result, (maxV * (maxV + 1)) / 2);

  vec.resize(num * 2, std::make_shared<int>(5));

  result = 0;
  for (auto& v : vec) {
    if (v) {
      result += *v;
    }
  }

  EXPECT_EQ(result, 5 * num + (maxV * (maxV + 1)) / 2);
}

TYPED_TEST(ConcurrentVectorTest, ResizeTiny) {
  RUN_VARIED_TEST(9, resize, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ResizeSmall) {
  RUN_VARIED_TEST(63, resize, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ResizeMedium) {
  RUN_VARIED_TEST(256, resize, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ResizeLarge) {
  RUN_VARIED_TEST(1 << 10, resize, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, ResizeLargeP1) {
  RUN_VARIED_TEST((1 << 10) + 1, resize, std::shared_ptr<int>);
}

template <typename CVec>
void comparison(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(i);
  }

  EXPECT_EQ(vec, vec);

  auto vec2 = vec;

  EXPECT_GE(vec, vec2);
  EXPECT_LE(vec, vec2);

  EXPECT_FALSE(vec < vec2);
  EXPECT_FALSE(vec2 < vec);

  vec2[num - 1] = 2 * num;

  EXPECT_LE(vec, vec2);
  EXPECT_LT(vec, vec2);

  EXPECT_GE(vec2, vec);
  EXPECT_GT(vec2, vec);

  EXPECT_FALSE(vec > vec2);
  EXPECT_FALSE(vec >= vec2);
  EXPECT_FALSE(vec2 < vec);
  EXPECT_FALSE(vec2 <= vec);

  EXPECT_NE(vec, vec2);
  EXPECT_FALSE(vec == vec2);

  vec2.pop_back();

  EXPECT_LE(vec2, vec);
  EXPECT_LT(vec2, vec);

  EXPECT_GE(vec, vec2);
  EXPECT_GT(vec, vec2);

  EXPECT_FALSE(vec2 > vec);
  EXPECT_FALSE(vec2 >= vec);
  EXPECT_FALSE(vec < vec2);
  EXPECT_FALSE(vec <= vec2);

  EXPECT_NE(vec, vec2);
  EXPECT_FALSE(vec == vec2);

  vec2.push_back(num - 1);

  EXPECT_EQ(vec, vec2);
}

TYPED_TEST(ConcurrentVectorTest, ComparisonTiny) {
  RUN_VARIED_TEST(12, comparison, int);
}
TYPED_TEST(ConcurrentVectorTest, ComparisonSmall) {
  RUN_VARIED_TEST(77, comparison, int);
}
TYPED_TEST(ConcurrentVectorTest, ComparisonMedium) {
  RUN_VARIED_TEST(222, comparison, int);
}
TYPED_TEST(ConcurrentVectorTest, ComparisonLarge) {
  RUN_VARIED_TEST(1 << 10, comparison, int);
}
TYPED_TEST(ConcurrentVectorTest, ComparisonLargeP1) {
  RUN_VARIED_TEST((1 << 10) + 1, comparison, int);
}

template <typename V>
std::string printVec(const V& vec) {
  if (vec.size() > 20) {
    return "too long to print";
  }
  std::string ret;
  for (auto& v : vec) {
    ret += std::to_string(*v) + " ";
  }
  return ret;
}

inline std::string printVec(const dispenso::ConcurrentVector<int>& vec) {
  if (vec.size() > 20) {
    return "too long to print";
  }
  std::string ret;
  for (auto v : vec) {
    ret += std::to_string(v) + " ";
  }
  return ret;
}

inline std::string printVec(const std::vector<int>& vec) {
  if (vec.size() > 20) {
    return "too long to print";
  }
  std::string ret;
  for (auto v : vec) {
    ret += std::to_string(v) + " ";
  }
  return ret;
}

#define EXPECT_VEC_SUM(vec, num)                                                        \
  do {                                                                                  \
    int64_t maxV = (num) - 1;                                                           \
    int64_t result = 0;                                                                 \
    for (auto& v : vec) {                                                               \
      result += *v;                                                                     \
    }                                                                                   \
    EXPECT_EQ(result, (maxV * (maxV + 1)) / 2)                                          \
        << "vec size is " << vec.size() << " num is " << (num) << " " << printVec(vec); \
  } while (false)

#define EXPECT_VEC_SUM_MINUS(vec, num, minus)                   \
  do {                                                          \
    int64_t maxV = (num) - 1;                                   \
    int64_t result = 0;                                         \
    for (auto& v : vec) {                                       \
      result += *v;                                             \
    }                                                           \
    EXPECT_EQ(result, (maxV * (maxV + 1)) / 2 - (minus))        \
        << "vec size is " << vec.size() << " num is " << (num); \
  } while (false)

template <typename CVec>
void swap(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  CVec vec2;
  for (int i = 0; i < num / 2; ++i) {
    vec2.push_back(std::make_unique<int>(i));
  }

  EXPECT_VEC_SUM(vec, num);
  EXPECT_VEC_SUM(vec2, num / 2);

  swap(vec, vec2);

  EXPECT_VEC_SUM(vec, num / 2);
  EXPECT_VEC_SUM(vec2, num);

  vec.swap(vec2);

  EXPECT_VEC_SUM(vec, num);
  EXPECT_VEC_SUM(vec2, num / 2);
}

TYPED_TEST(ConcurrentVectorTest, SwapTiny) {
  RUN_VARIED_TEST(12, swap, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, SwapSmall) {
  RUN_VARIED_TEST(77, swap, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, SwapMedium) {
  RUN_VARIED_TEST(222, swap, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, SwapLarge) {
  RUN_VARIED_TEST(1 << 11, swap, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, SwapLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, swap, std::unique_ptr<int>);
}

TYPED_TEST(ConcurrentVectorTest, IterToConstIter) {
  dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam> vec;
  typename dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam>::iterator it = vec.begin();
  typename dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam>::const_iterator cit = it;
  EXPECT_EQ(cit, it);
}

template <typename CVec>
void eraseOne(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }
  // 0 1 2 3 4 5 6 7 8 9 10 11
  EXPECT_VEC_SUM(vec, num);

  ASSERT_EQ(vec.end(), vec.erase(vec.end()));

  // 0 1 2 3 4 5 6 7 8 9 10 11
  ASSERT_EQ(vec.size(), num);

  vec.erase(vec.end() - 1);

  // 0 1 2 3 4 5 6 7 8 9 10
  ASSERT_EQ(vec.size(), num - 1);

  EXPECT_VEC_SUM(vec, num - 1);

  vec.erase(vec.end() - 1);

  // 0 1 2 3 4 5 6 7 8 9
  EXPECT_VEC_SUM(vec, num - 2);

  vec.erase(vec.begin());

  // 1 2 3 4 5 6 7 8 9
  ASSERT_EQ(vec.size(), num - 3) << printVec(vec);

  EXPECT_VEC_SUM(vec, num - 2);

  vec.erase(vec.begin() + 1);

  // 1 3 4 5 6 7 8 9
  EXPECT_VEC_SUM_MINUS(vec, num - 2, 2);
}

TYPED_TEST(ConcurrentVectorTest, EraseOneTiny) {
  RUN_VARIED_TEST(12, eraseOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseOneSmall) {
  RUN_VARIED_TEST(77, eraseOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseOneMedium) {
  RUN_VARIED_TEST(222, eraseOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseOneLarge) {
  RUN_VARIED_TEST(1 << 11, eraseOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseOneLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, eraseOne, std::unique_ptr<int>);
}

template <typename CVec>
void eraseRange(int num, CVec& vec) {
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
  }

  // 0 1 2 3 4 5 6 7 8 9 10 11
  EXPECT_VEC_SUM(vec, num);

  vec.erase(vec.end() - 1);

  // 0 1 2 3 4 5 6 7 8 9 10
  EXPECT_VEC_SUM(vec, num - 1);

  vec.erase(vec.end() - 5, vec.end());

  // 0 1 2 3 4 5
  EXPECT_VEC_SUM(vec, num - 6);

  vec.erase(vec.begin() + 1, vec.begin() + 3);

  // 0 3 4 5
  EXPECT_EQ(vec.size(), num - 8);

  EXPECT_VEC_SUM_MINUS(vec, num - 6, 3);

  //
  vec.erase(vec.begin(), vec.end());

  EXPECT_EQ(vec.size(), 0);
}

TYPED_TEST(ConcurrentVectorTest, EraseRangeTiny) {
  RUN_VARIED_TEST(12, eraseRange, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseRangeSmall) {
  RUN_VARIED_TEST(66, eraseRange, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseRangeMedium) {
  RUN_VARIED_TEST(111, eraseRange, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseRangeLarge) {
  RUN_VARIED_TEST(1 << 9, eraseRange, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, EraseRangeLargeP1) {
  RUN_VARIED_TEST((1 << 9) + 1, eraseRange, std::unique_ptr<int>);
}

#define EXPECT_CONTAINER_EQ(a, b)                                                \
  do {                                                                           \
    ASSERT_EQ(a.size(), b.size());                                               \
    for (size_t i = 0; i < a.size(); ++i) {                                      \
      EXPECT_EQ(a[i], *b[i]) << "index: " << i << "\n"                           \
                             << printVec(a) << "\n===========================\n" \
                             << printVec(b);                                     \
    }                                                                            \
  } while (0)

template <typename CVec>
void insertOne(int num, CVec& vec) {
  // Utilize std::vector as our baseline

  std::vector<int> baseline;
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_unique<int>(i));
    baseline.push_back(i);
  }

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(vec.begin(), std::make_unique<int>(5));
  baseline.insert(baseline.begin(), 5);

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(vec.begin() + 7, std::make_unique<int>(77));
  baseline.insert(baseline.begin() + 7, 77);

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(vec.end() - 1, std::make_unique<int>(33));
  baseline.insert(baseline.end() - 1, 33);

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(vec.end(), std::make_unique<int>(999));
  baseline.insert(baseline.end(), 999);

  EXPECT_CONTAINER_EQ(baseline, vec);
}

TYPED_TEST(ConcurrentVectorTest, InsertOneTiny) {
  RUN_VARIED_TEST(11, insertOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertOneSmall) {
  RUN_VARIED_TEST(40, insertOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertOneMedium) {
  RUN_VARIED_TEST(200, insertOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertOneLarge) {
  RUN_VARIED_TEST(1 << 10, insertOne, std::unique_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertOneLargeP1) {
  RUN_VARIED_TEST((1 << 10) + 1, insertOne, std::unique_ptr<int>);
}

template <typename CVec>
void insertRange(int num, CVec& vec) {
  // Utilize std::vector as our baseline

  std::vector<int> baseline;
  for (int i = 0; i < num; ++i) {
    vec.push_back(std::make_shared<int>(i));
    baseline.push_back(i);
  }

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(vec.begin(), 4, std::make_shared<int>(5));
  baseline.insert(baseline.begin(), 4, 5);

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(
      vec.begin() + 7,
      {std::make_shared<int>(1),
       std::make_shared<int>(2),
       std::make_shared<int>(3),
       std::make_shared<int>(4)});
  baseline.insert(baseline.begin() + 7, {1, 2, 3, 4});

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(
      vec.end() - 1,
      {std::make_shared<int>(1),
       std::make_shared<int>(2),
       std::make_shared<int>(3),
       std::make_shared<int>(4)});
  baseline.insert(baseline.end() - 1, {1, 2, 3, 4});

  EXPECT_CONTAINER_EQ(baseline, vec);

  vec.insert(
      vec.end(),
      {std::make_shared<int>(1),
       std::make_shared<int>(2),
       std::make_shared<int>(3),
       std::make_shared<int>(4)});
  baseline.insert(baseline.end(), {1, 2, 3, 4});

  EXPECT_CONTAINER_EQ(baseline, vec);
}

TYPED_TEST(ConcurrentVectorTest, InsertRangeTiny) {
  RUN_VARIED_TEST(13, insertRange, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertRangeSmall) {
  RUN_VARIED_TEST(111, insertRange, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertRangeMedium) {
  RUN_VARIED_TEST(600, insertRange, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertRangeLarge) {
  RUN_VARIED_TEST(1 << 11, insertRange, std::shared_ptr<int>);
}
TYPED_TEST(ConcurrentVectorTest, InsertRangeLargeP1) {
  RUN_VARIED_TEST((1 << 11) + 1, insertRange, std::shared_ptr<int>);
}

template <typename ContainerInit, typename ContainerPush>
void parallelImplGrowBy(
    int length,
    int growBy,
    ContainerInit containerInit,
    ContainerPush containerPush) {
  auto values = containerInit();
  dispenso::parallel_for(
      dispenso::makeChunkedRange(0, length, dispenso::ParForChunking::kStatic),
      [&values, containerPush, growBy](int i, int end) {
        while (i + growBy <= end) {
          containerPush(values, i, i + growBy);
          i += growBy;
        }
        containerPush(values, i, end);
      });

  EXPECT_VEC_SUM(values, length);
}

TYPED_TEST(ConcurrentVectorTest, InsertRangeLargeGrowBy10) {
  parallelImplGrowBy(
      (1 << 18),
      10,
      []() { return dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam>(); },
      [](dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam>& c, int i, int end) {
        c.grow_by_generator(end - i, [i]() mutable { return std::make_unique<int>(i++); });
      });
}

TYPED_TEST(ConcurrentVectorTest, InsertRangeLargeGrowBy100) {
  parallelImplGrowBy(
      (1 << 18),
      100,
      []() { return dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam>(); },
      [](dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam>& c, int i, int end) {
        c.grow_by_generator(end - i, [i]() mutable { return std::make_unique<int>(i++); });
      });
}

struct NonMovable {
  int i;
  NonMovable(int val) : i(val) {}
  NonMovable(NonMovable&& other) = delete;
  NonMovable& operator=(NonMovable&& other) = delete;
};

TYPED_TEST(ConcurrentVectorTest, NonMovableObjects) {
  dispenso::ConcurrentVector<NonMovable, TypeParam> vec;
  for (int i = 0; i < 10; ++i) {
    vec.emplace_back(i);
  }
  int idx = 0;
  for (auto& nmv : vec) {
    EXPECT_EQ(nmv.i, idx++);
  }

  dispenso::ConcurrentVector<NonMovable, TypeParam> other(std::move(vec));

  idx = 0;
  for (auto& nmv : other) {
    EXPECT_EQ(nmv.i, idx++);
  }
}

TYPED_TEST(ConcurrentVectorTest, OtherConstructorsTest) {
  dispenso::ConcurrentVector<std::unique_ptr<int>, TypeParam> a(193);
  size_t remaining = 193;
  for (auto& ap : a) {
    EXPECT_FALSE(ap);
    --remaining;
  }
  EXPECT_EQ(remaining, 0);

  dispenso::ConcurrentVector<std::shared_ptr<int>, TypeParam> b(211, std::make_shared<int>(5));
  remaining = 211;
  for (auto& bp : b) {
    ASSERT_TRUE(bp);
    EXPECT_EQ(*bp, 5);
    --remaining;
  }
  EXPECT_EQ(remaining, 0);

  dispenso::ConcurrentVector<int, TypeParam> c({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11});

  int v = 0;
  for (auto cv : c) {
    EXPECT_EQ(cv, v++);
  }
}


================================================
FILE: tests/concurrent_vector_test_common_types.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/concurrent_vector.h>

#include <algorithm>
#include <memory>
#include <vector>

#include <dispenso/parallel_for.h>
#include <gtest/gtest.h>

template <typename Traits>
class ConcurrentVectorTest : public testing::Test {
 public:
};

using dispenso::ConcurrentVectorReallocStrategy;

struct TestTraitsA {
  static constexpr bool kPreferBuffersInline = false;
  static constexpr ConcurrentVectorReallocStrategy kReallocStrategy =
      ConcurrentVectorReallocStrategy::kHalfBufferAhead;
  static constexpr bool kIteratorPreferSpeed = false;
};

struct TestTraitsB {
  static constexpr bool kPreferBuffersInline = true;
  static constexpr ConcurrentVectorReallocStrategy kReallocStrategy =
      ConcurrentVectorReallocStrategy::kFullBufferAhead;
  static constexpr bool kIteratorPreferSpeed = true;
};


================================================
FILE: tests/fast_math/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.14)

# Apply SIMD ISA flags and Highway linkage from parent.
set(FAST_MATH_EXTRA_FLAGS ${DISPENSO_FAST_MATH_SIMD_FLAGS})
set(FAST_MATH_EXTRA_LIBS "")
if(DISPENSO_FAST_MATH_HIGHWAY AND TARGET hwy)
  list(APPEND FAST_MATH_EXTRA_LIBS hwy)
  # Treat Highway as a system include to suppress warnings from its headers.
  if(MSVC)
    list(APPEND FAST_MATH_EXTRA_FLAGS /external:I ${hwy_SOURCE_DIR})
  else()
    list(APPEND FAST_MATH_EXTRA_FLAGS -isystem${hwy_SOURCE_DIR})
  endif()
endif()

# Scalar function tests (link eval.cpp helper)
file(GLOB FAST_MATH_TEST_FILES CONFIGURE_DEPENDS "*_test.cpp")

# Separate SIMD backend tests (no eval.cpp dependency)
set(SIMD_TEST_FILES
  ${CMAKE_CURRENT_SOURCE_DIR}/sse_test.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/avx_test.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/avx512_test.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/neon_test.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/hwy_test.cpp
)

foreach(SIMD_FILE ${SIMD_TEST_FILES})
  list(REMOVE_ITEM FAST_MATH_TEST_FILES ${SIMD_FILE})
endforeach()

# Scalar / util tests: each links eval.cpp
foreach(TEST_FILE ${FAST_MATH_TEST_FILES})
  set(TEST_NAME)
  get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
  set(TEST_NAME "fast_math_${TEST_NAME}")
  add_executable(${TEST_NAME} ${TEST_FILE} ${CMAKE_CURRENT_SOURCE_DIR}/eval.cpp)
  target_compile_features(${TEST_NAME} PRIVATE cxx_std_17)
  target_compile_options(${TEST_NAME} PRIVATE
    $<$<CXX_COMPILER_ID:MSVC>:/W3 /WX>
    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror>
    ${FAST_MATH_EXTRA_FLAGS}
  )
  target_link_libraries(${TEST_NAME} gmock_main gtest dispenso ${FAST_MATH_EXTRA_LIBS})
  gtest_discover_tests(${TEST_NAME}
    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
    PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
    LABELS "fast_math")
  set_target_properties(${TEST_NAME} PROPERTIES FOLDER tests/fast_math)
endforeach()

# Standalone evaluation tools (not gtests — no eval.cpp, no gtest linkage).
foreach(EVAL_FILE
    ${CMAKE_CURRENT_SOURCE_DIR}/ulp_eval.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/pow_ulp_eval.cpp)
  if(NOT EXISTS ${EVAL_FILE})
    continue()
  endif()
  get_filename_component(EVAL_NAME ${EVAL_FILE} NAME_WE)
  set(EVAL_NAME "fast_math_${EVAL_NAME}")
  add_executable(${EVAL_NAME} ${EVAL_FILE})
  target_compile_features(${EVAL_NAME} PRIVATE cxx_std_17)
  target_compile_options(${EVAL_NAME} PRIVATE
    $<$<CXX_COMPILER_ID:MSVC>:/W3>
    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion>
    ${FAST_MATH_EXTRA_FLAGS}
  )
  target_link_libraries(${EVAL_NAME} dispenso ${FAST_MATH_EXTRA_LIBS})
  set_target_properties(${EVAL_NAME} PROPERTIES FOLDER tests/fast_math)
endforeach()

# SIMD backend tests: no eval.cpp dependency.
foreach(TEST_FILE ${SIMD_TEST_FILES})
  set(TEST_NAME)
  get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
  set(TEST_NAME "fast_math_${TEST_NAME}")
  add_executable(${TEST_NAME} ${TEST_FILE})
  target_compile_features(${TEST_NAME} PRIVATE cxx_std_17)
  target_compile_options(${TEST_NAME} PRIVATE
    $<$<CXX_COMPILER_ID:MSVC>:/W3 /WX>
    $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror>
    ${FAST_MATH_EXTRA_FLAGS}
  )
  target_link_libraries(${TEST_NAME} gmock_main gtest dispenso ${FAST_MATH_EXTRA_LIBS})
  gtest_discover_tests(${TEST_NAME}
    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
    PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
    LABELS "fast_math")
  set_target_properties(${TEST_NAME} PROPERTIES FOLDER tests/fast_math)
endforeach()


================================================
FILE: tests/fast_math/acos_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

TEST(Acos, OutOfRange) {
  auto res = dfm::acos(1.00001f);
  EXPECT_NE(res, res);
  res = dfm::acos(-1.00001f);
  EXPECT_NE(res, res);
}

TEST(Acos, SpecialVals) {
  auto res = dfm::acos(-1.0f);
  EXPECT_EQ(res, kPi);
  res = dfm::acos(1.0f);
  EXPECT_EQ(res, 0.0f);
  res = dfm::acos(0.0f);
  EXPECT_FLOAT_EQ(res, kPi_2);
}

// Apple's acos implmentation for x86_64 does not match their implementation for
// ARM, nor does it match glibc or Window's acos implementations in the last bit(s).
#if defined(__APPLE__) && (defined(__i386__) || defined(__x86_64__))
constexpr uint32_t kMaxUlpsError = 4;
#else
constexpr uint32_t kMaxUlpsError = 3;
#endif // apple x86(_64)

TEST(Acos, Range) {
  uint32_t ulps = dfm::evalAccuracy(acosf, dfm::acos<float>, -1.0f, 1.0f);
  EXPECT_LE(ulps, kMaxUlpsError);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
FAST_MATH_ACCURACY_TESTS(AcosAll, ::acosf, dfm::acos, -1.0f, 1.0f, kMaxUlpsError)

// Special values tested across all SIMD backends (including out-of-range → NaN).
static const float kAcosSpecials[] = {
    0.0f,
    1.0f,
    -1.0f,
    0.5f,
    -0.5f,
    0.25f,
    -0.25f,
    0.75f,
    0.99999f,
    1.00001f,
    -1.00001f,
    2.0f,
    -5.0f,
    10.0f,
    -10.0f,
    100.0f,
    -100.0f};
FAST_MATH_SPECIAL_TESTS(AcosSpecial, ::acosf, dfm::acos, kAcosSpecials, kMaxUlpsError)


================================================
FILE: tests/fast_math/asin_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

using namespace dispenso::fast_math::testing;

TEST(Asin, OutOfRange) {
  auto res = dispenso::fast_math::asin(1.00001f);
  EXPECT_NE(res, res);
  res = dispenso::fast_math::asin(-1.00001f);
  EXPECT_NE(res, res);
}

TEST(Asin, SpecialVals) {
  auto res = dispenso::fast_math::asin(-1.0f);
  EXPECT_EQ(res, -kPi_2);
  res = dispenso::fast_math::asin(1.0f);
  EXPECT_EQ(res, kPi_2);
  res = dispenso::fast_math::asin(0.0f);
  EXPECT_EQ(res, 0.0f);
}

TEST(Asin, Range) {
  uint32_t ulps =
      dispenso::fast_math::evalAccuracy(asinf, dispenso::fast_math::asin<float>, -1.0f, 1.0f);
  EXPECT_LE(ulps, 3);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
namespace dfm = dispenso::fast_math;
constexpr uint32_t kAsinMaxUlps = 3;
FAST_MATH_ACCURACY_TESTS(AsinAll, ::asinf, dfm::asin, -1.0f, 1.0f, kAsinMaxUlps)

// Special values tested across all SIMD backends (including out-of-range → NaN).
static const float kAsinSpecials[] = {
    0.0f,
    1.0f,
    -1.0f,
    0.5f,
    -0.5f,
    0.25f,
    -0.25f,
    0.9f,
    0.99999f,
    1.00001f,
    -1.00001f,
    2.0f,
    -5.0f,
    10.0f,
    -10.0f,
    100.0f,
    -100.0f};
FAST_MATH_SPECIAL_TESTS(AsinSpecial, ::asinf, dfm::asin, kAsinSpecials, kAsinMaxUlps)


================================================
FILE: tests/fast_math/atan2_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <random>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

TEST(Atan2, SpecialVals) {
  constexpr float kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  auto res = dispenso::fast_math::atan2(0.0f, -1.0f);
  EXPECT_EQ(res, kPi);
  res = dispenso::fast_math::atan2(-0.0f, -1.0f);
  EXPECT_EQ(res, -kPi);

  res = dispenso::fast_math::atan2(0.0f, 1.0f);
  EXPECT_EQ(res, static_cast<float>(0.0f));
  res = dispenso::fast_math::atan2(-0.0f, 1.0f);
  EXPECT_EQ(
      dispenso::fast_math::bit_cast<uint32_t>(res), dispenso::fast_math::bit_cast<uint32_t>(-0.0f));

  res = dispenso::fast_math::atan2(-1.0f, 0.0f);
  EXPECT_EQ(res, -kPi_2);
  res = dispenso::fast_math::atan2(1.0f, 0.0f);
  EXPECT_EQ(res, kPi_2);
  res = dispenso::fast_math::atan2(0.0f, -0.0f);
  EXPECT_EQ(res, kPi);
  res = dispenso::fast_math::atan2(-0.0f, -0.0f);
  EXPECT_EQ(res, -kPi);
  res = dispenso::fast_math::atan2(0.0f, 0.0f);
  EXPECT_EQ(res, 0.0f);
  res = dispenso::fast_math::atan2(-0.0f, 0.0f);
  EXPECT_EQ(res, 0.0f);

  res = dispenso::fast_math::atan2(kNaN, 1.0f);
  EXPECT_NE(res, res);
  res = dispenso::fast_math::atan2(1.0f, kNaN);
  EXPECT_NE(res, res);
  res = dispenso::fast_math::atan2(kNaN, kNaN);
  EXPECT_NE(res, res);

  res = dispenso::fast_math::atan2(1.0f, -kInf);
  EXPECT_EQ(res, kPi);
  res = dispenso::fast_math::atan2(-1.0f, -kInf);
  EXPECT_EQ(res, -kPi);

  res = dispenso::fast_math::atan2(1.0f, kInf);
  EXPECT_EQ(res, static_cast<float>(0.0f));
  res = dispenso::fast_math::atan2(-1.0f, kInf);
  EXPECT_EQ(res, static_cast<float>(0.0f));

  res = dispenso::fast_math::atan2(kInf, 1.0f);
  EXPECT_EQ(res, kPi_2);
}

struct BoundsTraits {
  static constexpr bool kBoundsValues = true;
  static constexpr bool kMaxAccuracy = false; // This isn't actually used
};

TEST(Atan2WBounds, SpecialVals) {
  constexpr float kInf = std::numeric_limits<float>::infinity();

  auto atan2_bounds = dispenso::fast_math::atan2<float, BoundsTraits>;

  auto res = atan2_bounds(kInf, -1.0f);
  EXPECT_EQ(res, -kPi_2);

  res = atan2_bounds(kInf, -kInf);
  EXPECT_FLOAT_EQ(res, 3.0f * kPi_4);
  res = atan2_bounds(-kInf, -kInf);
  EXPECT_FLOAT_EQ(res, -3.0f * kPi_4);

  res = atan2_bounds(kInf, kInf);
  EXPECT_FLOAT_EQ(res, kPi_4);
  res = atan2_bounds(-kInf, kInf);
  EXPECT_FLOAT_EQ(res, -kPi_4);
}

TEST(Atan2, RangeNearZero) {
  constexpr int32_t kMax = 2048;

  float quantizer = 0.5f;
  quantizer *= quantizer;
  quantizer *= quantizer;
  quantizer *= quantizer;
  quantizer *= 0.25f;

  uint32_t maxUlps = 0;
  for (int32_t y = -kMax; y <= kMax; ++y) {
    for (int32_t x = -kMax; x <= kMax; ++x) {
      float yf = static_cast<float>(y) * quantizer;
      float xf = static_cast<float>(x) * quantizer;
      maxUlps = std::max(
          maxUlps,
          dispenso::fast_math::float_distance(
              ::atan2f(yf, xf), dispenso::fast_math::atan2(yf, xf)));
    }
  }
  EXPECT_LE(maxUlps, 3);
}

TEST(Atan, RandomSamples) {
  constexpr size_t kNumSamples = 1 << 24;

  std::mt19937 gen(77); // Standard mersenne_twister_engine seeded with rd()
  std::uniform_real_distribution<float> dis(-1e5f, 1e5f);

  uint32_t maxUlps = 0;
  for (size_t i = 0; i < kNumSamples; ++i) {
    // get random sample
    float yf = dis(gen);
    float xf = dis(gen);
    float gt = ::atan2f(yf, xf);
    float apx = dispenso::fast_math::atan2(yf, xf);
    uint32_t ulps = dispenso::fast_math::float_distance(gt, apx);
    maxUlps = std::max(maxUlps, ulps);
  }

  EXPECT_LE(maxUlps, 3);
}

// Wrappers for traits variants — macro instantiates func<Flt>.
template <typename Flt>
Flt atan2_max(Flt y, Flt x) {
  return dfm::atan2<Flt, dfm::MaxAccuracyTraits>(y, x);
}

// Default atan2 handles all edge cases (NaN, Inf, zero-sign).
constexpr uint32_t kAtan2Ulps = 3;
// clang-format off
static const float kAtan2SpecialY[] = {
    1.0f, -1.0f, 0.0f, 1.0f,                                           // basic quadrants
    0.0f, -0.0f, 1.0f, -1.0f,                                          // axis values
    0.0f, -0.0f, 0.0f, -0.0f,                                          // zero-zero combos
    1e6f, 1.0f, 0.001f,                                                 // varied magnitudes
    std::numeric_limits<float>::quiet_NaN(), 1.0f,                      // NaN
    std::numeric_limits<float>::quiet_NaN(),
    1.0f, -1.0f, std::numeric_limits<float>::infinity(),                // inf cases
    -std::numeric_limits<float>::infinity()};
static const float kAtan2SpecialX[] = {
    1.0f, 1.0f, 1.0f, -1.0f,
    -1.0f, -1.0f, 0.0f, 0.0f,
    0.0f, 0.0f, -0.0f, -0.0f,
    1.0f, 1e6f, 0.001f,
    1.0f, std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity(),
    1.0f, 1.0f};
// clang-format on
FAST_MATH_SPECIAL_TESTS_2ARG(
    Atan2DefaultSpecial,
    ::atan2f,
    dfm::atan2,
    kAtan2SpecialY,
    kAtan2SpecialX,
    kAtan2Ulps)

// MaxAccuracy additionally handles both-inf quadrant cases.
// clang-format off
static const float kAtan2MaxAccY[] = {
    1.0f, -1.0f, 0.0f, 1.0f,                                           // basic quadrants
    0.0f, -0.0f, 1.0f, -1.0f,                                          // axis values
    0.0f, -0.0f, 0.0f, -0.0f,                                          // zero-zero combos
    std::numeric_limits<float>::quiet_NaN(), 1.0f,                      // NaN
    std::numeric_limits<float>::quiet_NaN(),
    1.0f, -1.0f,                                                        // one-inf
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::infinity(),                              // inf-inf quadrants
    -std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
static const float kAtan2MaxAccX[] = {
    1.0f, 1.0f, 1.0f, -1.0f,
    -1.0f, -1.0f, 0.0f, 0.0f,
    0.0f, 0.0f, -0.0f, -0.0f,
    1.0f, std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity(),
    1.0f, 1.0f,
    std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
// clang-format on
FAST_MATH_SPECIAL_TESTS_2ARG(
    Atan2MaxAccSpecial,
    ::atan2f,
    atan2_max,
    kAtan2MaxAccY,
    kAtan2MaxAccX,
    kAtan2Ulps)


================================================
FILE: tests/fast_math/atan_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <limits>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

// We have reached asymptote for float32 at this value.
constexpr float kFarEnough = 20000000.0f;

TEST(Atan, SpecialVals) {
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();
  constexpr float kInf = std::numeric_limits<float>::infinity();

  auto res = dfm::atan(-kFarEnough);
  EXPECT_EQ(res, -kPi_2);
  res = dfm::atan(kFarEnough);
  EXPECT_EQ(res, kPi_2);
  res = dfm::atan(0.0f);
  EXPECT_EQ(res, 0.0f);
  // NaN and Inf
  EXPECT_NE(dfm::atan(kNaN), dfm::atan(kNaN));
  EXPECT_EQ(dfm::atan(kInf), kPi_2);
  EXPECT_EQ(dfm::atan(-kInf), -kPi_2);
}

TEST(Atan, Range) {
  uint32_t ulps = dfm::evalAccuracy(atanf, dfm::atan<float>, -kFarEnough, kFarEnough);
  EXPECT_LE(ulps, 3);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kAtanMaxUlps = 3;
FAST_MATH_ACCURACY_TESTS(AtanAll, ::atanf, dfm::atan, -kFarEnough, kFarEnough, kAtanMaxUlps)

// Special values tested across all SIMD backends.
static const float kAtanSpecials[] = {
    0.0f,
    -0.0f,
    -kFarEnough,
    kFarEnough,
    1.0f,
    -1.0f,
    0.5f,
    -0.5f,
    2.0f,
    -2.0f,
    10.0f,
    -10.0f,
    100.0f,
    1e10f,
    -1e10f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(AtanSpecial, ::atanf, dfm::atan, kAtanSpecials, kAtanMaxUlps)


================================================
FILE: tests/fast_math/avx512_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <cmath>
#include <limits>

#include <gtest/gtest.h>

#if defined(__AVX512F__)

namespace dfm = dispenso::fast_math;
using Avx512Float = dfm::Avx512Float;
using Avx512Int32 = dfm::Avx512Int32;
using Avx512Uint32 = dfm::Avx512Uint32;
using Avx512Mask = dfm::Avx512Mask;

constexpr int32_t kLanes = 16;

// Helper: extract lane i from __m512.
static float lane(__m512 v, int i) {
  alignas(64) float buf[kLanes];
  _mm512_store_ps(buf, v);
  return buf[i];
}

// Helper: extract lane i from __m512i (as int32_t).
static int32_t lane(__m512i v, int i) {
  alignas(64) int32_t buf[kLanes];
  _mm512_store_si512(buf, v);
  return buf[i];
}

// Helper: extract bit i from Avx512Mask.
static bool maskBit(Avx512Mask m, int i) {
  return (m.m >> i) & 1;
}

// Helper: create __m512 from 16 distinct values.
static __m512 make16(
    float a,
    float b,
    float c,
    float d,
    float e,
    float f,
    float g,
    float h,
    float i,
    float j,
    float k,
    float l,
    float m,
    float n,
    float o,
    float p) {
  return _mm512_set_ps(p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a);
}

// ---- Avx512Mask basic operations ----

TEST(Avx512Mask, ConstructFromInt) {
  Avx512Mask allTrue(1);
  EXPECT_EQ(allTrue.m, static_cast<__mmask16>(0xFFFF));
  Avx512Mask allFalse(0);
  EXPECT_EQ(allFalse.m, static_cast<__mmask16>(0));
}

TEST(Avx512Mask, LogicalOps) {
  Avx512Mask a(static_cast<__mmask16>(0xFF00));
  Avx512Mask b(static_cast<__mmask16>(0x0FF0));

  Avx512Mask andResult = a & b;
  EXPECT_EQ(andResult.m, static_cast<__mmask16>(0x0F00));

  Avx512Mask orResult = a | b;
  EXPECT_EQ(orResult.m, static_cast<__mmask16>(0xFFF0));

  Avx512Mask xorResult = a ^ b;
  EXPECT_EQ(xorResult.m, static_cast<__mmask16>(0xF0F0));

  Avx512Mask notResult = !a;
  EXPECT_EQ(notResult.m, static_cast<__mmask16>(0x00FF));
}

TEST(Avx512Mask, Equality) {
  Avx512Mask a(static_cast<__mmask16>(0xFF00));
  Avx512Mask b(static_cast<__mmask16>(0xFF00));
  Avx512Mask c(static_cast<__mmask16>(0x00FF));

  // Same masks → all bits set (per-bit XNOR)
  Avx512Mask eqAB = a == b;
  EXPECT_EQ(eqAB.m, static_cast<__mmask16>(0xFFFF));

  // Complementary masks → all bits clear
  Avx512Mask eqAC = a == c;
  EXPECT_EQ(eqAC.m, static_cast<__mmask16>(0));

  // mask == 0 is equivalent to !mask
  Avx512Mask mask0 = a == Avx512Mask(0);
  EXPECT_EQ(mask0.m, static_cast<__mmask16>(0x00FF));
}

TEST(Avx512Mask, ToInt32Conversion) {
  Avx512Mask m(static_cast<__mmask16>(0xA5A5)); // 1010 0101 1010 0101
  Avx512Int32 expanded = m; // implicit conversion
  for (int i = 0; i < kLanes; ++i) {
    int32_t expected = ((0xA5A5 >> i) & 1) ? -1 : 0;
    EXPECT_EQ(lane(expanded, i), expected) << "Lane " << i;
  }
}

// ---- Avx512Float basic arithmetic ----

TEST(Avx512Float, Broadcast) {
  Avx512Float v(3.0f);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(v, i), 3.0f);
  }
}

TEST(Avx512Float, Arithmetic) {
  Avx512Float a = make16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
  Avx512Float b = make16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);

  Avx512Float sum = a + b;
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(sum, i), 17.0f) << "Lane " << i;
  }

  Avx512Float diff = b - a;
  EXPECT_EQ(lane(diff, 0), 15.0f);
  EXPECT_EQ(lane(diff, 15), -15.0f);

  Avx512Float prod = a * b;
  EXPECT_EQ(lane(prod, 0), 16.0f);
  EXPECT_EQ(lane(prod, 7), 72.0f);
  EXPECT_EQ(lane(prod, 15), 16.0f);

  Avx512Float quot = a / b;
  EXPECT_FLOAT_EQ(lane(quot, 0), 1.0f / 16.0f);
  EXPECT_FLOAT_EQ(lane(quot, 15), 16.0f);
}

TEST(Avx512Float, Negation) {
  Avx512Float a = make16(
      1.0f,
      -2.0f,
      0.0f,
      3.0f,
      -0.5f,
      100.0f,
      -0.0f,
      42.0f,
      -1.0f,
      2.0f,
      0.0f,
      -3.0f,
      0.5f,
      -100.0f,
      0.0f,
      -42.0f);
  Avx512Float neg = -a;
  EXPECT_EQ(lane(neg, 0), -1.0f);
  EXPECT_EQ(lane(neg, 1), 2.0f);
  EXPECT_EQ(lane(neg, 2), -0.0f);
  EXPECT_EQ(lane(neg, 3), -3.0f);
  EXPECT_EQ(lane(neg, 4), 0.5f);
  EXPECT_EQ(lane(neg, 5), -100.0f);
  // -(-0.0) should be +0.0
  EXPECT_FALSE(std::signbit(lane(neg, 6)));
  EXPECT_EQ(lane(neg, 7), -42.0f);
}

TEST(Avx512Float, CompoundAssignment) {
  Avx512Float a(1.0f);
  a += Avx512Float(2.0f);
  EXPECT_EQ(lane(a, 0), 3.0f);
  a -= Avx512Float(1.0f);
  EXPECT_EQ(lane(a, 0), 2.0f);
  a *= Avx512Float(5.0f);
  EXPECT_EQ(lane(a, 0), 10.0f);
}

// ---- Avx512Float comparisons ----

TEST(Avx512Float, Comparisons) {
  Avx512Float a = make16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
  Avx512Float b = make16(2, 2, 1, 5, 5, 4, 7, 9, 9, 10, 11, 12, 13, 14, 15, 16);

  Avx512Mask lt = a < b;
  EXPECT_TRUE(maskBit(lt, 0)); // 1 < 2 true
  EXPECT_FALSE(maskBit(lt, 1)); // 2 < 2 false
  EXPECT_FALSE(maskBit(lt, 2)); // 3 < 1 false
  EXPECT_TRUE(maskBit(lt, 3)); // 4 < 5 true
  EXPECT_FALSE(maskBit(lt, 4)); // 5 < 5 false
  EXPECT_FALSE(maskBit(lt, 5)); // 6 < 4 false
  EXPECT_FALSE(maskBit(lt, 6)); // 7 < 7 false
  EXPECT_TRUE(maskBit(lt, 7)); // 8 < 9 true

  Avx512Mask gt = a > b;
  EXPECT_FALSE(maskBit(gt, 0)); // 1 > 2 false
  EXPECT_TRUE(maskBit(gt, 2)); // 3 > 1 true
  EXPECT_TRUE(maskBit(gt, 5)); // 6 > 4 true

  Avx512Mask eq = a == b;
  EXPECT_FALSE(maskBit(eq, 0)); // 1 == 2 false
  EXPECT_TRUE(maskBit(eq, 1)); // 2 == 2 true
  EXPECT_TRUE(maskBit(eq, 4)); // 5 == 5 true
  EXPECT_TRUE(maskBit(eq, 6)); // 7 == 7 true
  // All lanes 8-15 are equal in our setup
  for (int i = 8; i < kLanes; ++i) {
    EXPECT_TRUE(maskBit(eq, i)) << "Lane " << i;
  }
}

// ---- Avx512Int32 arithmetic ----

TEST(Avx512Int32, BasicOps) {
  Avx512Int32 a(10);
  Avx512Int32 b(3);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a + b, i), 13) << "Lane " << i;
    EXPECT_EQ(lane(a - b, i), 7) << "Lane " << i;
    EXPECT_EQ(lane(a * b, i), 30) << "Lane " << i;
  }
}

TEST(Avx512Int32, Negation) {
  Avx512Int32 a =
      Avx512Int32(_mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));
  Avx512Int32 neg = -a;
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(neg, i), -(i + 1)) << "Lane " << i;
  }
}

TEST(Avx512Int32, ShiftOps) {
  Avx512Int32 a(8);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a << 2, i), 32) << "Lane " << i;
    EXPECT_EQ(lane(a >> 1, i), 4) << "Lane " << i;
  }

  Avx512Int32 neg(-8);
  EXPECT_EQ(lane(neg >> 1, 0), -4); // Arithmetic shift preserves sign
}

TEST(Avx512Int32, Comparisons) {
  Avx512Int32 a =
      Avx512Int32(_mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 5, 5, 4, 3, 2, 1));
  Avx512Int32 b =
      Avx512Int32(_mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 1, 7, 6, 4, 4, 3, 9, 0));

  Avx512Mask eq = a == b;
  EXPECT_FALSE(maskBit(eq, 0)); // 1 == 0 false
  EXPECT_FALSE(maskBit(eq, 1)); // 2 == 9 false
  EXPECT_TRUE(maskBit(eq, 2)); // 3 == 3 true
  EXPECT_TRUE(maskBit(eq, 3)); // 4 == 4 true
  EXPECT_FALSE(maskBit(eq, 4)); // 5 == 4 false
  EXPECT_FALSE(maskBit(eq, 5)); // 5 == 6 false
  EXPECT_TRUE(maskBit(eq, 6)); // 7 == 7 true
  EXPECT_FALSE(maskBit(eq, 7)); // 8 == 1 false
  // Lanes 8-15 are all equal
  for (int i = 8; i < kLanes; ++i) {
    EXPECT_TRUE(maskBit(eq, i)) << "Lane " << i;
  }

  Avx512Mask lt = a < b;
  EXPECT_FALSE(maskBit(lt, 0)); // 1 < 0 false
  EXPECT_TRUE(maskBit(lt, 1)); // 2 < 9 true
  EXPECT_TRUE(maskBit(lt, 5)); // 5 < 6 true
}

// ---- Avx512Uint32 ----

TEST(Avx512Uint32, LogicalShift) {
  Avx512Uint32 a(0x80000000u);
  // Logical shift right — does NOT preserve sign.
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(static_cast<uint32_t>(lane(a >> 1, i)), 0x40000000u) << "Lane " << i;
  }
}

TEST(Avx512Uint32, UnsignedComparison) {
  Avx512Uint32 a(0x80000000u);
  Avx512Uint32 b(0x7FFFFFFFu);
  // Unsigned: 0x80000000 > 0x7FFFFFFF (native AVX-512 unsigned compare)
  Avx512Mask gt = a > b;
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_TRUE(maskBit(gt, i)) << "Lane " << i;
  }
}

// ---- bit_cast ----

TEST(Avx512BitCast, FloatToInt) {
  Avx512Float f(1.0f);
  Avx512Int32 i = dfm::bit_cast<Avx512Int32>(f);
  for (int j = 0; j < kLanes; ++j) {
    EXPECT_EQ(lane(i, j), 0x3f800000) << "Lane " << j;
  }
}

TEST(Avx512BitCast, IntToFloat) {
  Avx512Int32 i(0x40000000);
  Avx512Float f = dfm::bit_cast<Avx512Float>(i);
  for (int j = 0; j < kLanes; ++j) {
    EXPECT_EQ(lane(f, j), 2.0f) << "Lane " << j;
  }
}

TEST(Avx512BitCast, RoundTrip) {
  Avx512Float original = make16(
      1.0f,
      -2.0f,
      0.5f,
      42.0f,
      -0.0f,
      3.14f,
      1e10f,
      -1e-10f,
      0.0f,
      -1.0f,
      2.0f,
      -0.5f,
      100.0f,
      -100.0f,
      1e-30f,
      -1e30f);
  Avx512Float roundtripped = dfm::bit_cast<Avx512Float>(dfm::bit_cast<Avx512Int32>(original));
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(roundtripped, i), lane(original, i)) << "Lane " << i;
  }
}

// ---- FloatTraits<Avx512Float> ----

TEST(Avx512FloatTraits, ConditionalWithMask) {
  // Create mask: even lanes true, odd lanes false
  Avx512Mask mask(static_cast<__mmask16>(0x5555)); // 0101 0101 0101 0101
  Avx512Float x(10.0f);
  Avx512Float y(20.0f);
  Avx512Float result = dfm::FloatTraits<Avx512Float>::conditional(mask, x, y);
  for (int i = 0; i < kLanes; ++i) {
    float expected = (i % 2 == 0) ? 10.0f : 20.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, ConditionalFromComparison) {
  // Test that comparison → mask → conditional works end-to-end.
  Avx512Float a = make16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
  Avx512Float threshold(8.5f);
  auto mask = a > threshold;
  Avx512Float result = dfm::FloatTraits<Avx512Float>::conditional(mask, Avx512Float(100.0f), a);
  for (int i = 0; i < kLanes; ++i) {
    float inp = static_cast<float>(i + 1);
    float expected = (inp > 8.5f) ? 100.0f : inp;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, ConditionalInt32WithMask) {
  Avx512Mask mask(static_cast<__mmask16>(0x5555));
  Avx512Int32 x(100);
  Avx512Int32 y(200);
  Avx512Int32 result = dfm::FloatTraits<Avx512Float>::conditional(mask, x, y);
  for (int i = 0; i < kLanes; ++i) {
    int32_t expected = (i % 2 == 0) ? 100 : 200;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, ConditionalWithLaneWideMask) {
  // _mm512_set_epi32 fills from highest to lowest lane: lane 0=0, lane 1=-1, etc.
  // So odd lanes have mask=-1 (true → x), even lanes have mask=0 (false → y).
  Avx512Int32 mask =
      Avx512Int32(_mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0));
  Avx512Float x(10.0f);
  Avx512Float y(20.0f);
  Avx512Float result = dfm::FloatTraits<Avx512Float>::conditional(mask, x, y);
  for (int i = 0; i < kLanes; ++i) {
    float expected = (i % 2 == 0) ? 20.0f : 10.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, Apply) {
  Avx512Mask mask(static_cast<__mmask16>(0xFF00)); // upper 8 lanes true
  Avx512Float x(42.0f);
  Avx512Float result = dfm::FloatTraits<Avx512Float>::apply(mask, x);
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(lane(result, i), 0.0f) << "Lane " << i;
  }
  for (int i = 8; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 42.0f) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, Fma) {
  Avx512Float a(2.0f), b(3.0f), c(4.0f);
  Avx512Float result = dfm::FloatTraits<Avx512Float>::fma(a, b, c);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 10.0f) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, Sqrt) {
  Avx512Float input = make16(
      1.0f,
      4.0f,
      9.0f,
      16.0f,
      25.0f,
      36.0f,
      49.0f,
      64.0f,
      81.0f,
      100.0f,
      121.0f,
      144.0f,
      169.0f,
      196.0f,
      225.0f,
      256.0f);
  Avx512Float result = dfm::FloatTraits<Avx512Float>::sqrt(input);
  for (int i = 0; i < kLanes; ++i) {
    float expected = std::sqrt(lane(input, i));
    EXPECT_FLOAT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(Avx512FloatTraits, MinMax) {
  Avx512Float a = make16(1, 5, 3, 7, 2, 8, 4, 6, 10, 0, 9, 1, 11, 3, 7, 5);
  Avx512Float b = make16(4, 2, 6, 0, 9, 1, 4, 3, 0, 10, 1, 9, 3, 11, 5, 7);
  Avx512Float mn = dfm::FloatTraits<Avx512Float>::min(a, b);
  Avx512Float mx = dfm::FloatTraits<Avx512Float>::max(a, b);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(mn, i), std::min(lane(a, i), lane(b, i))) << "Lane " << i;
    EXPECT_EQ(lane(mx, i), std::max(lane(a, i), lane(b, i))) << "Lane " << i;
  }
}

// ---- Util functions ----

TEST(Avx512Util, FloorSmall) {
  __m512 x = make16(
      1.5f,
      -1.5f,
      2.0f,
      0.1f,
      3.9f,
      -0.1f,
      0.0f,
      -3.7f,
      100.1f,
      -100.9f,
      0.999f,
      -0.999f,
      10.0f,
      -10.0f,
      0.5f,
      -0.5f);
  __m512 result = dfm::floor_small(x);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), std::floor(lane(x, i))) << "Lane " << i;
  }
}

TEST(Avx512Util, ConvertToInt) {
  __m512 x = make16(
      1.6f,
      -1.6f,
      2.5f,
      0.4f,
      3.5f,
      -2.5f,
      0.0f,
      100.0f,
      -100.0f,
      1.5f,
      -1.5f,
      10.4f,
      -10.4f,
      0.5f,
      -0.5f,
      42.0f);
  __m512i result = dfm::convert_to_int(x);
  // Round to nearest even.
  int32_t expected[] = {2, -2, 2, 0, 4, -2, 0, 100, -100, 2, -2, 10, -10, 0, 0, 42};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(Avx512Util, ConvertToInt_NaN) {
  // NaN/Inf should be masked to 0.
  __m512 x = make16(
      std::numeric_limits<float>::quiet_NaN(),
      std::numeric_limits<float>::infinity(),
      -std::numeric_limits<float>::infinity(),
      1.0f,
      2.0f,
      3.0f,
      4.0f,
      5.0f,
      6.0f,
      7.0f,
      8.0f,
      9.0f,
      10.0f,
      11.0f,
      12.0f,
      13.0f);
  __m512i result = dfm::convert_to_int(x);
  EXPECT_EQ(lane(result, 0), 0); // NaN → 0
  EXPECT_EQ(lane(result, 1), 0); // Inf → 0
  EXPECT_EQ(lane(result, 2), 0); // -Inf → 0
  EXPECT_EQ(lane(result, 3), 1);
}

TEST(Avx512Util, Gather) {
  float table[] = {
      10.0f,
      20.0f,
      30.0f,
      40.0f,
      50.0f,
      60.0f,
      70.0f,
      80.0f,
      90.0f,
      100.0f,
      110.0f,
      120.0f,
      130.0f,
      140.0f,
      150.0f,
      160.0f,
      170.0f};
  __m512i indices = _mm512_set_epi32(16, 0, 15, 1, 14, 2, 13, 3, 12, 4, 11, 5, 10, 6, 9, 7);
  __m512 result = dfm::gather<Avx512Float>(table, indices);
  int32_t idx_vals[] = {7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 0, 16};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), table[idx_vals[i]]) << "Lane " << i;
  }
}

TEST(Avx512Util, IntDivBy3) {
  __m512i a = _mm512_set_epi32(99, 90, 81, 72, 63, 48, 33, 30, 27, 24, 21, 12, 9, 6, 3, 0);
  __m512i result = dfm::int_div_by_3(a);
  int32_t inputs[] = {0, 3, 6, 9, 12, 21, 24, 27, 30, 33, 48, 63, 72, 81, 90, 99};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), inputs[i] / 3) << "Lane " << i << " input=" << inputs[i];
  }
}

TEST(Avx512Util, Signof) {
  __m512 x = make16(
      1.0f,
      -1.0f,
      0.0f,
      -0.0f,
      42.0f,
      -42.0f,
      0.5f,
      -0.5f,
      1e10f,
      -1e10f,
      1e-20f,
      -1e-10f,
      100.0f,
      -100.0f,
      0.001f,
      -0.001f);
  __m512 result = dfm::signof(x);
  for (int i = 0; i < kLanes; ++i) {
    float expected = std::signbit(lane(x, i)) ? -1.0f : 1.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(Avx512Util, Signofi) {
  __m512i a = _mm512_set_epi32(-100, 100, -1, 0, 10, -5, 1, -1, 50, -50, 0, -0, 1000, -1000, 7, -7);
  __m512i result = dfm::signofi<Avx512Float>(Avx512Int32(a));
  int32_t inputs[16];
  alignas(64) int32_t buf[16];
  _mm512_store_si512(buf, a);
  for (int i = 0; i < kLanes; ++i) {
    inputs[i] = buf[i];
    int32_t expected = (inputs[i] < 0) ? -1 : 1;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i << " input=" << inputs[i];
  }
}

TEST(Avx512Util, Nonnormal) {
  __m512 x = make16(
      1.0f,
      std::numeric_limits<float>::infinity(),
      -std::numeric_limits<float>::infinity(),
      std::numeric_limits<float>::quiet_NaN(),
      0.0f,
      -0.0f,
      std::numeric_limits<float>::denorm_min(),
      std::numeric_limits<float>::max(),
      std::numeric_limits<float>::min(),
      -1.0f,
      42.0f,
      -42.0f,
      0.5f,
      -0.5f,
      1e30f,
      -1e30f);
  Avx512Mask result = dfm::nonnormal(Avx512Float(x));
  EXPECT_FALSE(maskBit(result, 0)); // normal
  EXPECT_TRUE(maskBit(result, 1)); // inf
  EXPECT_TRUE(maskBit(result, 2)); // -inf
  EXPECT_TRUE(maskBit(result, 3)); // nan
  EXPECT_FALSE(maskBit(result, 4)); // zero
  EXPECT_FALSE(maskBit(result, 5)); // -zero
  EXPECT_FALSE(maskBit(result, 6)); // denorm
  EXPECT_FALSE(maskBit(result, 7)); // normal max
  for (int i = 8; i < kLanes; ++i) {
    EXPECT_FALSE(maskBit(result, i)) << "Lane " << i;
  }
}

TEST(Avx512Util, NonnormalOrZero) {
  __m512 x = make16(
      1.0f,
      std::numeric_limits<float>::infinity(),
      -std::numeric_limits<float>::infinity(),
      std::numeric_limits<float>::quiet_NaN(),
      0.0f,
      -0.0f,
      std::numeric_limits<float>::denorm_min(),
      std::numeric_limits<float>::max(),
      std::numeric_limits<float>::min(),
      -1.0f,
      42.0f,
      -42.0f,
      0.5f,
      -0.5f,
      1e30f,
      -1e30f);
  Avx512Mask result = dfm::nonnormalOrZero(dfm::bit_cast<Avx512Int32>(Avx512Float(x)));
  EXPECT_FALSE(maskBit(result, 0)); // normal
  EXPECT_TRUE(maskBit(result, 1)); // inf
  EXPECT_TRUE(maskBit(result, 2)); // -inf
  EXPECT_TRUE(maskBit(result, 3)); // nan
  EXPECT_TRUE(maskBit(result, 4)); // zero
  EXPECT_TRUE(maskBit(result, 5)); // -zero (exponent bits are zero)
  EXPECT_TRUE(maskBit(result, 6)); // denorm (exponent bits are zero)
  EXPECT_FALSE(maskBit(result, 7)); // normal max
}

TEST(Avx512Util, BoolAsOne) {
  Avx512Mask mask(static_cast<__mmask16>(0x5555)); // even lanes true
  Avx512Float fResult = dfm::bool_as_one<Avx512Float>(mask);
  Avx512Int32 iResult = dfm::bool_as_one<Avx512Int32>(mask);
  for (int i = 0; i < kLanes; ++i) {
    float expectedF = (i % 2 == 0) ? 1.0f : 0.0f;
    int32_t expectedI = (i % 2 == 0) ? 1 : 0;
    EXPECT_EQ(lane(fResult, i), expectedF) << "Lane " << i;
    EXPECT_EQ(lane(iResult, i), expectedI) << "Lane " << i;
  }
}

TEST(Avx512Util, NboolAsOne) {
  Avx512Mask mask(static_cast<__mmask16>(0x5555)); // even lanes true
  Avx512Float fResult = dfm::nbool_as_one<Avx512Float>(mask);
  Avx512Int32 iResult = dfm::nbool_as_one<Avx512Int32>(mask);
  for (int i = 0; i < kLanes; ++i) {
    float expectedF = (i % 2 == 0) ? 0.0f : 1.0f;
    int32_t expectedI = (i % 2 == 0) ? 0 : 1;
    EXPECT_EQ(lane(fResult, i), expectedF) << "Lane " << i;
    EXPECT_EQ(lane(iResult, i), expectedI) << "Lane " << i;
  }
}

TEST(Avx512Util, BoolAsMask) {
  Avx512Mask mask(static_cast<__mmask16>(0xA5A5));
  Avx512Int32 intMask = dfm::bool_as_mask<Avx512Int32>(mask);
  for (int i = 0; i < kLanes; ++i) {
    int32_t expected = maskBit(mask, i) ? -1 : 0;
    EXPECT_EQ(lane(intMask, i), expected) << "Lane " << i;
  }
}

TEST(Avx512Util, BoolApplyOrZero) {
  Avx512Mask mask(static_cast<__mmask16>(0xFF00)); // upper 8 lanes true
  Avx512Float val(42.0f);
  Avx512Float result = dfm::bool_apply_or_zero(mask, val);
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(lane(result, i), 0.0f) << "Lane " << i;
  }
  for (int i = 8; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 42.0f) << "Lane " << i;
  }

  // Test with int
  Avx512Int32 ival(0x7f800000);
  Avx512Int32 iresult = dfm::bool_apply_or_zero(mask, ival);
  for (int i = 0; i < 8; ++i) {
    EXPECT_EQ(lane(iresult, i), 0) << "Lane " << i;
  }
  for (int i = 8; i < kLanes; ++i) {
    EXPECT_EQ(lane(iresult, i), 0x7f800000) << "Lane " << i;
  }
}

#else // !defined(__AVX512F__)

TEST(Avx512, Unavailable) {
  GTEST_SKIP() << "AVX-512 not available";
}

#endif // defined(__AVX512F__)


================================================
FILE: tests/fast_math/avx_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <cmath>
#include <limits>

#include <gtest/gtest.h>

#if defined(__AVX2__)

namespace dfm = dispenso::fast_math;
using AvxFloat = dfm::AvxFloat;
using AvxInt32 = dfm::AvxInt32;
using AvxUint32 = dfm::AvxUint32;

constexpr int32_t kLanes = 8;

// Helper: extract lane i from __m256.
static float lane(__m256 v, int i) {
  alignas(32) float buf[kLanes];
  _mm256_store_ps(buf, v);
  return buf[i];
}

// Helper: extract lane i from __m256i (as int32_t).
static int32_t lane(__m256i v, int i) {
  alignas(32) int32_t buf[kLanes];
  _mm256_store_si256(reinterpret_cast<__m256i*>(buf), v);
  return buf[i];
}

// Helper: create __m256 from 8 distinct values.
static __m256 make8(float a, float b, float c, float d, float e, float f, float g, float h) {
  return _mm256_set_ps(h, g, f, e, d, c, b, a);
}

// ---- AvxFloat basic arithmetic ----

TEST(AvxFloat, Broadcast) {
  AvxFloat v(3.0f);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(v, i), 3.0f);
  }
}

TEST(AvxFloat, Arithmetic) {
  AvxFloat a = make8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
  AvxFloat b = make8(8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f);

  AvxFloat sum = a + b;
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(sum, i), 9.0f) << "Lane " << i;
  }

  AvxFloat diff = b - a;
  EXPECT_EQ(lane(diff, 0), 7.0f);
  EXPECT_EQ(lane(diff, 7), -7.0f);

  AvxFloat prod = a * b;
  EXPECT_EQ(lane(prod, 0), 8.0f);
  EXPECT_EQ(lane(prod, 3), 20.0f);
  EXPECT_EQ(lane(prod, 7), 8.0f);

  AvxFloat quot = a / b;
  EXPECT_FLOAT_EQ(lane(quot, 0), 0.125f);
  EXPECT_FLOAT_EQ(lane(quot, 7), 8.0f);
}

TEST(AvxFloat, Negation) {
  AvxFloat a = make8(1.0f, -2.0f, 0.0f, 3.0f, -0.5f, 100.0f, -0.0f, 42.0f);
  AvxFloat neg = -a;
  EXPECT_EQ(lane(neg, 0), -1.0f);
  EXPECT_EQ(lane(neg, 1), 2.0f);
  EXPECT_EQ(lane(neg, 2), -0.0f);
  EXPECT_EQ(lane(neg, 3), -3.0f);
  EXPECT_EQ(lane(neg, 4), 0.5f);
  EXPECT_EQ(lane(neg, 5), -100.0f);
  // -(-0.0) should be +0.0
  EXPECT_FALSE(std::signbit(lane(neg, 6)));
  EXPECT_EQ(lane(neg, 7), -42.0f);
}

TEST(AvxFloat, CompoundAssignment) {
  AvxFloat a(1.0f);
  a += AvxFloat(2.0f);
  EXPECT_EQ(lane(a, 0), 3.0f);
  a -= AvxFloat(1.0f);
  EXPECT_EQ(lane(a, 0), 2.0f);
  a *= AvxFloat(5.0f);
  EXPECT_EQ(lane(a, 0), 10.0f);
}

// ---- AvxFloat comparisons ----

TEST(AvxFloat, Comparisons) {
  AvxFloat a = make8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
  AvxFloat b = make8(2.0f, 2.0f, 1.0f, 5.0f, 5.0f, 4.0f, 7.0f, 9.0f);

  AvxFloat lt = a < b;
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(lt, 0)), 0u); // 1 < 2 true
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 1)), 0u); // 2 < 2 false
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 2)), 0u); // 3 < 1 false
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(lt, 3)), 0u); // 4 < 5 true
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 4)), 0u); // 5 < 5 false
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 5)), 0u); // 6 < 4 false
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 6)), 0u); // 7 < 7 false
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(lt, 7)), 0u); // 8 < 9 true

  AvxFloat gt = a > b;
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(gt, 0)), 0u); // 1 > 2 false
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(gt, 2)), 0u); // 3 > 1 true
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(gt, 5)), 0u); // 6 > 4 true

  AvxFloat eq = a == b;
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(eq, 0)), 0u); // 1 == 2 false
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(eq, 1)), 0u); // 2 == 2 true
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(eq, 4)), 0u); // 5 == 5 true
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(eq, 6)), 0u); // 7 == 7 true
}

TEST(AvxFloat, LogicalNot) {
  AvxFloat mask = AvxFloat(make8(1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f)) > AvxFloat(0.5f);
  AvxFloat notmask = !mask;
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(notmask, 0)), 0u);
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(notmask, 1)), 0u);
}

// ---- AvxInt32 arithmetic ----

TEST(AvxInt32, BasicOps) {
  AvxInt32 a(10);
  AvxInt32 b(3);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a + b, i), 13) << "Lane " << i;
    EXPECT_EQ(lane(a - b, i), 7) << "Lane " << i;
    EXPECT_EQ(lane(a * b, i), 30) << "Lane " << i;
  }
}

TEST(AvxInt32, Negation) {
  AvxInt32 a = AvxInt32(_mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1));
  AvxInt32 neg = -a;
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(neg, i), -(i + 1)) << "Lane " << i;
  }
}

TEST(AvxInt32, ShiftOps) {
  AvxInt32 a(8);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a << 2, i), 32) << "Lane " << i;
    EXPECT_EQ(lane(a >> 1, i), 4) << "Lane " << i;
  }

  AvxInt32 neg(-8);
  EXPECT_EQ(lane(neg >> 1, 0), -4); // Arithmetic shift preserves sign
}

TEST(AvxInt32, Comparisons) {
  AvxInt32 a = AvxInt32(_mm256_set_epi32(8, 7, 5, 5, 4, 3, 2, 1));
  AvxInt32 b = AvxInt32(_mm256_set_epi32(1, 7, 6, 4, 4, 3, 9, 0));

  AvxInt32 eq = a == b;
  EXPECT_EQ(lane(eq, 0), 0); // 1 == 0 false
  EXPECT_NE(lane(eq, 2), 0); // 3 == 3 true
  EXPECT_NE(lane(eq, 3), 0); // 4 == 4 true
  EXPECT_NE(lane(eq, 6), 0); // 7 == 7 true

  AvxInt32 lt = a < b;
  EXPECT_EQ(lane(lt, 0), 0); // 1 < 0 false
  EXPECT_NE(lane(lt, 1), 0); // 2 < 9 true
}

// ---- AvxUint32 ----

TEST(AvxUint32, LogicalShift) {
  AvxUint32 a(0x80000000u);
  // Logical shift right — does NOT preserve sign.
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(static_cast<uint32_t>(lane(a >> 1, i)), 0x40000000u) << "Lane " << i;
  }
}

TEST(AvxUint32, UnsignedComparison) {
  AvxUint32 a(0x80000000u);
  AvxUint32 b(0x7FFFFFFFu);
  // Unsigned: 0x80000000 > 0x7FFFFFFF
  AvxUint32 gt = a > b;
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_NE(static_cast<uint32_t>(lane(gt, i)), 0u) << "Lane " << i;
  }
}

// ---- bit_cast ----

TEST(AvxBitCast, FloatToInt) {
  AvxFloat f(1.0f);
  AvxInt32 i = dfm::bit_cast<AvxInt32>(f);
  for (int j = 0; j < kLanes; ++j) {
    EXPECT_EQ(lane(i, j), 0x3f800000) << "Lane " << j;
  }
}

TEST(AvxBitCast, IntToFloat) {
  AvxInt32 i(0x40000000);
  AvxFloat f = dfm::bit_cast<AvxFloat>(i);
  for (int j = 0; j < kLanes; ++j) {
    EXPECT_EQ(lane(f, j), 2.0f) << "Lane " << j;
  }
}

TEST(AvxBitCast, RoundTrip) {
  AvxFloat original = make8(1.0f, -2.0f, 0.5f, 42.0f, -0.0f, 3.14f, 1e10f, -1e-10f);
  AvxFloat roundtripped = dfm::bit_cast<AvxFloat>(dfm::bit_cast<AvxInt32>(original));
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(roundtripped, i), lane(original, i)) << "Lane " << i;
  }
}

// ---- FloatTraits<AvxFloat> ----

TEST(AvxFloatTraits, Conditional) {
  AvxFloat mask = AvxFloat(make8(1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f)) > AvxFloat(0.5f);
  AvxFloat x(10.0f);
  AvxFloat y(20.0f);
  AvxFloat result = dfm::FloatTraits<AvxFloat>::conditional(mask, x, y);
  for (int i = 0; i < kLanes; ++i) {
    float expected = (i % 2 == 0) ? 10.0f : 20.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(AvxFloatTraits, ConditionalInt32) {
  AvxFloat mask = AvxFloat(make8(1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f)) > AvxFloat(0.5f);
  AvxInt32 x(100);
  AvxInt32 y(200);
  AvxInt32 result = dfm::FloatTraits<AvxFloat>::conditional(mask, x, y);
  for (int i = 0; i < kLanes; ++i) {
    int32_t expected = (i % 2 == 0) ? 100 : 200;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(AvxFloatTraits, Fma) {
  AvxFloat a(2.0f), b(3.0f), c(4.0f);
  AvxFloat result = dfm::FloatTraits<AvxFloat>::fma(a, b, c);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 10.0f) << "Lane " << i;
  }
}

TEST(AvxFloatTraits, Sqrt) {
  AvxFloat input = make8(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f);
  AvxFloat result = dfm::FloatTraits<AvxFloat>::sqrt(input);
  float expected[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_FLOAT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(AvxFloatTraits, MinMax) {
  AvxFloat a = make8(1.0f, 5.0f, 3.0f, 7.0f, 2.0f, 8.0f, 4.0f, 6.0f);
  AvxFloat b = make8(4.0f, 2.0f, 6.0f, 0.0f, 9.0f, 1.0f, 4.0f, 3.0f);
  AvxFloat mn = dfm::FloatTraits<AvxFloat>::min(a, b);
  AvxFloat mx = dfm::FloatTraits<AvxFloat>::max(a, b);
  float emin[] = {1.0f, 2.0f, 3.0f, 0.0f, 2.0f, 1.0f, 4.0f, 3.0f};
  float emax[] = {4.0f, 5.0f, 6.0f, 7.0f, 9.0f, 8.0f, 4.0f, 6.0f};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(mn, i), emin[i]) << "Lane " << i;
    EXPECT_EQ(lane(mx, i), emax[i]) << "Lane " << i;
  }
}

// ---- Util functions ----

TEST(AvxUtil, FloorSmall) {
  __m256 x = make8(1.5f, -1.5f, 2.0f, 0.1f, 3.9f, -0.1f, 0.0f, -3.7f);
  __m256 result = dfm::floor_small(x);
  float expected[] = {1.0f, -2.0f, 2.0f, 0.0f, 3.0f, -1.0f, 0.0f, -4.0f};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(AvxUtil, ConvertToInt) {
  __m256 x = make8(1.6f, -1.6f, 2.5f, 0.4f, 3.5f, -2.5f, 0.0f, 100.0f);
  __m256i result = dfm::convert_to_int(x);
  // Round to nearest even.
  int32_t expected[] = {2, -2, 2, 0, 4, -2, 0, 100};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(AvxUtil, ConvertToInt_NaN) {
  // NaN/Inf should be masked to 0.
  __m256 x = make8(
      std::numeric_limits<float>::quiet_NaN(),
      std::numeric_limits<float>::infinity(),
      -std::numeric_limits<float>::infinity(),
      1.0f,
      2.0f,
      3.0f,
      4.0f,
      5.0f);
  __m256i result = dfm::convert_to_int(x);
  EXPECT_EQ(lane(result, 0), 0); // NaN → 0
  EXPECT_EQ(lane(result, 1), 0); // Inf → 0
  EXPECT_EQ(lane(result, 2), 0); // -Inf → 0
  EXPECT_EQ(lane(result, 3), 1);
}

TEST(AvxUtil, Gather) {
  float table[] = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f};
  __m256i indices = _mm256_set_epi32(8, 0, 5, 2, 7, 1, 4, 3);
  __m256 result = dfm::gather<AvxFloat>(table, indices);
  EXPECT_EQ(lane(result, 0), 40.0f);
  EXPECT_EQ(lane(result, 1), 50.0f);
  EXPECT_EQ(lane(result, 2), 20.0f);
  EXPECT_EQ(lane(result, 3), 80.0f);
  EXPECT_EQ(lane(result, 4), 30.0f);
  EXPECT_EQ(lane(result, 5), 60.0f);
  EXPECT_EQ(lane(result, 6), 10.0f);
  EXPECT_EQ(lane(result, 7), 90.0f);
}

TEST(AvxUtil, IntDivBy3) {
  __m256i a = _mm256_set_epi32(99, 30, 27, 12, 9, 6, 3, 0);
  __m256i result = dfm::int_div_by_3(a);
  int32_t expected[] = {0, 1, 2, 3, 4, 9, 10, 33};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(AvxUtil, Signof) {
  __m256 x = make8(1.0f, -1.0f, 0.0f, -0.0f, 42.0f, -42.0f, 0.5f, -0.5f);
  __m256 result = dfm::signof(x);
  float expected[] = {1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(AvxUtil, Signofi) {
  __m256i a = _mm256_set_epi32(-100, 100, -1, 0, 10, -5, 1, -1);
  __m256i result = dfm::signofi<AvxFloat>(AvxInt32(a));
  int32_t expected[] = {-1, 1, -1, 1, 1, -1, 1, -1};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), expected[i]) << "Lane " << i;
  }
}

TEST(AvxUtil, Nonnormal) {
  __m256 x = make8(
      1.0f,
      std::numeric_limits<float>::infinity(),
      -std::numeric_limits<float>::infinity(),
      std::numeric_limits<float>::quiet_NaN(),
      0.0f,
      -0.0f,
      std::numeric_limits<float>::denorm_min(),
      std::numeric_limits<float>::max());
  AvxInt32 result = dfm::nonnormal(AvxFloat(x));
  EXPECT_EQ(lane(result, 0), 0); // normal
  EXPECT_NE(lane(result, 1), 0); // inf
  EXPECT_NE(lane(result, 2), 0); // -inf
  EXPECT_NE(lane(result, 3), 0); // nan
  EXPECT_EQ(lane(result, 4), 0); // zero
  EXPECT_EQ(lane(result, 5), 0); // -zero
  EXPECT_EQ(lane(result, 6), 0); // denorm
  EXPECT_EQ(lane(result, 7), 0); // normal max
}

#else // !defined(__AVX2__)

// Dummy test so the binary has at least one test on non-AVX2 platforms.
TEST(AvxFloat, NotAvailable) {
  GTEST_SKIP() << "AVX2 not available";
}

#endif // defined(__AVX2__)


================================================
FILE: tests/fast_math/bivariate_ulp_eval.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Reusable harness for exhaustive ULP evaluation of two-argument fast_math
// functions. Uses Halton low-discrepancy sequences for uniform 2D domain
// coverage and dispenso::parallel_for for parallel evaluation.
//
// Usage pattern (in a per-function *_ulp_eval.cpp):
//
//   #include "bivariate_ulp_eval.h"
//
//   // Define ground truth, domains, and batch runners (scalar + SIMD).
//   // Call evalFunc2D() for each variant.
//
// The batch runner interface:
//   void runner(const float* xs, const float* ys, float* out, int32_t n);
// n is always a multiple of kBatchSize (64). Scalar runners process one
// element at a time; SIMD runners process kLanes at a time (4/8/16).

#pragma once

#include <atomic>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstring>

#include <dispenso/parallel_for.h>

#include <dispenso/fast_math/util.h>

namespace dispenso {
namespace fast_math {

// --- Halton sequence ---

// Compute element i of the Halton sequence in base b. Returns [0, 1).
inline double halton(uint64_t i, uint32_t b) {
  double f = 1.0;
  double r = 0.0;
  uint64_t idx = i;
  while (idx > 0) {
    f /= static_cast<double>(b);
    r += f * static_cast<double>(idx % b);
    idx /= b;
  }
  return r;
}

// Map a Halton value in [0, 1) to [lo, hi]. Uses log-space mapping when the
// range spans >100x (better coverage of both small and large values).
inline float mapToRange(double h, float lo, float hi) {
  if (lo > 0.0f && hi / lo > 100.0f) {
    double log_lo = std::log(static_cast<double>(lo));
    double log_hi = std::log(static_cast<double>(hi));
    return static_cast<float>(std::exp(log_lo + h * (log_hi - log_lo)));
  }
  return static_cast<float>(static_cast<double>(lo) + h * (static_cast<double>(hi) - lo));
}

// --- Types ---

struct Domain2D {
  float x_lo, x_hi;
  float y_lo, y_hi;
  const char* label;
};

struct EvalResult2D {
  uint32_t maxUlp;
  float worstX;
  float worstY;
  uint64_t count3; // samples with >= 3 ULP error
  uint64_t validSamples;
};

using GroundTruth2D = float (*)(float, float);

// Batch size for Halton sample generation. Must be a multiple of the widest
// SIMD width (64 bytes / 4 bytes = 16 lanes for AVX-512). 64 gives 4 full
// AVX-512 vectors per batch.
static constexpr int32_t kBatchSize = 64;

// --- Core evaluation ---

// Evaluate a two-argument function over a 2D domain using Halton sampling.
// gt: ground truth function (double-precision internally).
// runner: batch processor — processes kBatchSize (x,y) pairs from aligned arrays.
// domain: 2D domain to sample.
// numSamples: total number of Halton samples.
template <typename BatchRunner>
EvalResult2D
evalDomain2D(GroundTruth2D gt, BatchRunner runner, const Domain2D& domain, uint64_t numSamples) {
  // Round up to multiple of kBatchSize for clean batch processing.
  uint64_t numBatches = (numSamples + kBatchSize - 1) / kBatchSize;

  std::atomic<uint32_t> globalMaxUlp{0};
  std::atomic<float> globalWorstX{0.0f};
  std::atomic<float> globalWorstY{0.0f};
  std::atomic<uint64_t> globalCount3{0};
  std::atomic<uint64_t> globalValid{0};

  dispenso::parallel_for(
      dispenso::makeChunkedRange(uint64_t{0}, numBatches, uint64_t{256}),
      [&](uint64_t batchBegin, uint64_t batchEnd) {
        alignas(64) float xs[kBatchSize];
        alignas(64) float ys[kBatchSize];
        alignas(64) float results[kBatchSize];

        uint32_t localMax = 0;
        float localWorstX = 0.0f;
        float localWorstY = 0.0f;
        uint64_t localCount3 = 0;
        uint64_t localValid = 0;

        for (uint64_t bi = batchBegin; bi < batchEnd; ++bi) {
          uint64_t sampleBase = bi * kBatchSize;

          // Generate Halton samples for this batch.
          int32_t batchN = static_cast<int32_t>(
              std::min(static_cast<uint64_t>(kBatchSize), numSamples - sampleBase));
          for (int32_t j = 0; j < batchN; ++j) {
            uint64_t idx = sampleBase + static_cast<uint64_t>(j) + 1; // +1: skip degenerate i=0
            xs[j] = mapToRange(halton(idx, 2), domain.x_lo, domain.x_hi);
            ys[j] = mapToRange(halton(idx, 3), domain.y_lo, domain.y_hi);
          }
          // Pad remainder with safe values (won't be compared).
          for (int32_t j = batchN; j < kBatchSize; ++j) {
            xs[j] = domain.x_lo;
            ys[j] = domain.y_lo;
          }

          // Run approximation on full batch (SIMD needs aligned, full-width loads).
          runner(xs, ys, results, kBatchSize);

          // Compare only the real samples against ground truth.
          for (int32_t j = 0; j < batchN; ++j) {
            float g = gt(xs[j], ys[j]);
            float a = results[j];

            if (!std::isfinite(g) || g == 0.0f || !std::isfinite(a))
              continue;

            ++localValid;
            uint32_t d = float_distance(g, a);
            if (d >= 3)
              ++localCount3;
            if (d > localMax) {
              localMax = d;
              localWorstX = xs[j];
              localWorstY = ys[j];
            }
          }
        }

        globalCount3.fetch_add(localCount3, std::memory_order_relaxed);
        globalValid.fetch_add(localValid, std::memory_order_relaxed);
        uint32_t prev = globalMaxUlp.load(std::memory_order_relaxed);
        while (localMax > prev) {
          if (globalMaxUlp.compare_exchange_weak(prev, localMax, std::memory_order_relaxed)) {
            globalWorstX.store(localWorstX, std::memory_order_relaxed);
            globalWorstY.store(localWorstY, std::memory_order_relaxed);
            break;
          }
        }
      });

  return {
      globalMaxUlp.load(),
      globalWorstX.load(),
      globalWorstY.load(),
      globalCount3.load(),
      globalValid.load()};
}

// --- Reporting ---

// Evaluate all domains for a function and print a table.
template <typename BatchRunner, int32_t N>
void evalFunc2D(
    const char* name,
    GroundTruth2D gt,
    BatchRunner runner,
    Domain2D (&domains)[N],
    uint64_t numSamples) {
  printf("%-28s  %5s  %10s  %12s  %-s\n", name, "ULP", ">=3ULP", "valid", "Worst (x, y)");
  for (int32_t i = 0; i < N; ++i) {
    auto r = evalDomain2D(gt, runner, domains[i], numSamples);
    printf(
        "  %-26s  %5u  %10llu  %12llu  (%.6g, %.6g)\n",
        domains[i].label,
        r.maxUlp,
        static_cast<unsigned long long>(r.count3),
        static_cast<unsigned long long>(r.validSamples),
        r.worstX,
        r.worstY);
  }
}

// --- CLI helpers ---

struct EvalOptions {
  uint64_t numSamples = 1000000000ULL; // 1B default
  const char* filter = nullptr;
};

inline EvalOptions parseEvalOptions(int argc, char** argv) {
  EvalOptions opts;
  for (int i = 1; i < argc; ++i) {
    if (strcmp(argv[i], "--samples") == 0 && i + 1 < argc) {
      opts.numSamples = static_cast<uint64_t>(std::atof(argv[++i]));
    } else if (strcmp(argv[i], "--filter") == 0 && i + 1 < argc) {
      opts.filter = argv[++i];
    } else if (strcmp(argv[i], "--help") == 0) {
      printf("Usage: %s [--samples N] [--filter name]\n", argv[0]);
      printf("  --samples N    Halton samples per domain (default: 1e9)\n");
      printf("  --filter name  Only run variants containing 'name'\n");
    }
  }
  return opts;
}

inline bool shouldRun(const EvalOptions& opts, const char* name) {
  return opts.filter == nullptr || strstr(name, opts.filter) != nullptr;
}

} // namespace fast_math
} // namespace dispenso


================================================
FILE: tests/fast_math/cbrt_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

#include <iomanip>
#include <limits>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

auto cbrt_acc = dispenso::fast_math::cbrt<float, dispenso::fast_math::MaxAccuracyTraits>;

TEST(Cbrt, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(cbrt_acc(0.0f), 0.0f) << std::setprecision(16) << cbrt_acc(0.0f);
  EXPECT_EQ(cbrt_acc(-kInf), -kInf);
  EXPECT_EQ(cbrt_acc(kInf), kInf);
  EXPECT_NE(cbrt_acc(kNaN), cbrt_acc(kNaN));
}

constexpr uint32_t kCbrtOuterUlps = 13; // 12 bits on Linux, MacOs, 13 with MSVC

TEST(Cbrt, RangeNeg) {
  auto result = dispenso::fast_math::evalAccuracy(
      cbrtf,
      dispenso::fast_math::cbrt<float>,
      -std::numeric_limits<float>::max(),
      -std::numeric_limits<float>::epsilon());

  EXPECT_LE(result, kCbrtOuterUlps);
}
TEST(Cbrt, RangePos) {
  auto result = dispenso::fast_math::evalAccuracy(
      cbrtf,
      dispenso::fast_math::cbrt<float>,
      std::numeric_limits<float>::epsilon(),
      std::numeric_limits<float>::max());

  EXPECT_LE(result, kCbrtOuterUlps);
}
TEST(Cbrt, RangeSmall) {
  auto result = dispenso::fast_math::evalAccuracy(
      cbrtf,
      dispenso::fast_math::cbrt<float>,
      -std::numeric_limits<float>::epsilon(),
      std::numeric_limits<float>::epsilon());

  // cbrt handles denormals via rescaling — uniform ULP accuracy near zero.
  EXPECT_LE(result, kCbrtOuterUlps);
}

constexpr uint32_t kCbrtOuterUlpsAcc = 3;

TEST(CbrtAccurate, RangeNeg) {
  auto result = dispenso::fast_math::evalAccuracy(
      cbrtf, cbrt_acc, -std::numeric_limits<float>::max(), -std::numeric_limits<float>::epsilon());

  EXPECT_LE(result, kCbrtOuterUlpsAcc);
}
TEST(CbrtAccurate, RangePos) {
  auto result = dispenso::fast_math::evalAccuracy(
      cbrtf, cbrt_acc, std::numeric_limits<float>::epsilon(), std::numeric_limits<float>::max());

  EXPECT_LE(result, kCbrtOuterUlpsAcc);
}
TEST(CbrtAccurate, RangeSmall) {
  auto result = dispenso::fast_math::evalAccuracy(
      cbrtf,
      cbrt_acc,
      -std::numeric_limits<float>::epsilon(),
      std::numeric_limits<float>::epsilon());

  EXPECT_LE(result, kCbrtOuterUlpsAcc);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt cbrt_max(Flt x) {
  return dfm::cbrt<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
FAST_MATH_ACCURACY_TESTS(
    CbrtDefaultAll,
    ::cbrtf,
    dfm::cbrt,
    -std::numeric_limits<float>::max(),
    std::numeric_limits<float>::max(),
    kCbrtOuterUlps)
FAST_MATH_ACCURACY_TESTS(
    CbrtMaxAccAll,
    ::cbrtf,
    cbrt_max,
    -std::numeric_limits<float>::max(),
    std::numeric_limits<float>::max(),
    kCbrtOuterUlpsAcc)

// Special values tested across all SIMD backends.
// MaxAccuracy handles NaN/Inf.
static const float kCbrtMaxAccSpecials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -1.0f,
    8.0f,
    -8.0f,
    27.0f,
    -27.0f,
    64.0f,
    125.0f,
    0.125f,
    0.001f,
    1e-20f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(
    CbrtMaxAccSpecial,
    ::cbrtf,
    cbrt_max,
    kCbrtMaxAccSpecials,
    kCbrtOuterUlpsAcc)

// Default traits don't handle NaN/Inf — only test in-domain values.
static const float kCbrtDefaultSpecials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -1.0f,
    8.0f,
    -8.0f,
    27.0f,
    -27.0f,
    64.0f,
    125.0f,
    0.125f,
    0.001f,
    1000.0f,
    -1000.0f};
FAST_MATH_SPECIAL_TESTS(
    CbrtDefaultSpecial,
    ::cbrtf,
    dfm::cbrt,
    kCbrtDefaultSpecials,
    kCbrtOuterUlps)


================================================
FILE: tests/fast_math/cos_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

auto cos_accurate = dispenso::fast_math::cos<float, dispenso::fast_math::MaxAccuracyTraits>;

// MacOS seems to have a slightly busted cos and cosf, but cosl seems to agree with other platforms.
#if defined(__APPLE__)
float gt_cos(float x) {
  return ::cosl(x);
}
#else
auto gt_cos = ::cosf;
#endif //__APPLE__

TEST(Cos, SpecialValues) {
  constexpr float kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();
  auto cos = dispenso::fast_math::cos<float, dispenso::fast_math::MaxAccuracyTraits>;

  EXPECT_NE(cos(-kInf), cos(-kInf));
  EXPECT_NE(cos(kInf), cos(kInf));
  EXPECT_NE(cos(kNaN), cos(kNaN));
  EXPECT_EQ(cos(0.0f), 1.0f);
}

constexpr uint32_t kCosAccurateUlps = 1;
constexpr uint32_t kCosAccurateUlpsLarge = 1;
constexpr uint32_t kCosAccurateUlpsVeryLarge = 2;

constexpr uint32_t kCosUlps = 1;
constexpr uint32_t kCosUlpsLarge = 1;
constexpr uint32_t kCosUlpsVeryLarge = 2;

TEST(Cos, RangePi) {
  auto result = dispenso::fast_math::evalAccuracy(gt_cos, cos_accurate, -kPi, kPi);

  EXPECT_LE(result, kCosAccurateUlps);
}

TEST(Cos, Range128Pi) {
  auto result = dispenso::fast_math::evalAccuracy(gt_cos, cos_accurate, -128 * kPi, 128 * kPi);

  EXPECT_LE(result, kCosAccurateUlpsLarge);
}

TEST(Cos, Range1MPi) {
  auto result =
      dispenso::fast_math::evalAccuracy(gt_cos, cos_accurate, -(1 << 20) * kPi, -128 * kPi);
  EXPECT_LE(result, kCosAccurateUlpsVeryLarge);
  result = dispenso::fast_math::evalAccuracy(gt_cos, cos_accurate, 128 * kPi, (1 << 20) * kPi);

  EXPECT_LE(result, kCosAccurateUlpsVeryLarge);
}

TEST(CosLessAccurate, RangePi) {
  auto result =
      dispenso::fast_math::evalAccuracy(gt_cos, dispenso::fast_math::cos<float>, -kPi, kPi);

  EXPECT_LE(result, kCosUlps);
}

TEST(CosLessAccurate, Range128Pi) {
  auto result =
      dispenso::fast_math::evalAccuracy(gt_cos, dispenso::fast_math::cos<float>, -128 * kPi, -kPi);

  EXPECT_LE(result, kCosUlpsLarge);

  result =
      dispenso::fast_math::evalAccuracy(gt_cos, dispenso::fast_math::cos<float>, kPi, 128 * kPi);

  EXPECT_LE(result, kCosUlpsLarge);
}

TEST(CosLessAccurate, Range32768Pi) {
  auto result = dispenso::fast_math::evalAccuracy(
      gt_cos, dispenso::fast_math::cos<float>, -32768 * kPi, -128 * kPi);

  EXPECT_LE(result, kCosUlpsVeryLarge);

  result = dispenso::fast_math::evalAccuracy(
      gt_cos, dispenso::fast_math::cos<float>, 128 * kPi, 32768 * kPi);

  EXPECT_LE(result, kCosUlpsVeryLarge);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt cos_max(Flt x) {
  return dfm::cos<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
FAST_MATH_ACCURACY_TESTS(
    CosDefaultAll,
    gt_cos,
    dfm::cos,
    -32768 * kPi,
    32768 * kPi,
    kCosUlpsVeryLarge)
FAST_MATH_ACCURACY_TESTS(
    CosMaxAccAll,
    gt_cos,
    cos_max,
    -(1 << 20) * kPi,
    (1 << 20) * kPi,
    kCosAccurateUlpsVeryLarge)

// Special values tested across all SIMD backends.
static const float kCosSpecials[] = {
    0.0f,
    -0.0f,
    kPi,
    -kPi,
    kPi_2,
    -kPi_2,
    2.0f * kPi,
    -2.0f * kPi,
    100.0f,
    -100.0f,
    1000.0f,
    -1000.0f,
    1e-6f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(CosMaxAccSpecial, gt_cos, cos_max, kCosSpecials, kCosAccurateUlpsVeryLarge)
FAST_MATH_SPECIAL_TESTS(CosDefaultSpecial, gt_cos, dfm::cos, kCosSpecials, kCosUlpsVeryLarge)


================================================
FILE: tests/fast_math/erf_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

static float gt_erf(float x) {
  return static_cast<float>(std::erf(static_cast<double>(x)));
}

constexpr uint32_t kErfMaxUlps = 2;

TEST(Erf, SpecialValues) {
  EXPECT_EQ(dfm::erf(0.0f), 0.0f);
  EXPECT_EQ(dfm::erf(-0.0f), -0.0f);
  EXPECT_TRUE(std::signbit(dfm::erf(-0.0f)));
  EXPECT_FLOAT_EQ(dfm::erf(1.0f), gt_erf(1.0f));
  EXPECT_FLOAT_EQ(dfm::erf(-1.0f), gt_erf(-1.0f));
  EXPECT_EQ(dfm::erf(std::numeric_limits<float>::infinity()), 1.0f);
  EXPECT_EQ(dfm::erf(-std::numeric_limits<float>::infinity()), -1.0f);
}

TEST(Erf, NaN) {
  float nan = std::numeric_limits<float>::quiet_NaN();
  EXPECT_TRUE(std::isnan(dfm::erf(nan)));
}

TEST(Erf, Saturation) {
  // For |x| >= 3.92, erf rounds to ±1 in float.
  EXPECT_EQ(dfm::erf(4.0f), 1.0f);
  EXPECT_EQ(dfm::erf(-4.0f), -1.0f);
  EXPECT_EQ(dfm::erf(10.0f), 1.0f);
  EXPECT_EQ(dfm::erf(-10.0f), -1.0f);
  EXPECT_EQ(dfm::erf(100.0f), 1.0f);
  EXPECT_EQ(dfm::erf(-100.0f), -1.0f);
}

TEST(Erf, NearZero) {
  // erf(x) ≈ (2/√π)·x for small x. Verify no cancellation.
  float xs[] = {1e-7f, -1e-7f, 1e-5f, -1e-5f, 1e-3f, -1e-3f, 0.01f, -0.01f, 0.1f, -0.1f};
  for (float x : xs) {
    float expected = gt_erf(x);
    float result = dfm::erf(x);
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, kErfMaxUlps) << "erf(" << x << "): expected=" << expected << " got=" << result;
  }
}

// Exhaustive accuracy tests — scalar + all available SIMD backends, same threshold.
FAST_MATH_ACCURACY_TESTS(Erf, gt_erf, dfm::erf, -4.0f, 4.0f, kErfMaxUlps)

// Special values tested across all SIMD backends.
static const float kErfSpecials[] = {
    0.0f,
    -0.0f,
    0.5f,
    -0.5f,
    1.0f,
    -1.0f,
    2.0f,
    -2.0f,
    3.0f,
    -3.0f,
    4.0f,
    -4.0f,
    10.0f,
    -10.0f,
    100.0f,
    -100.0f,
    1e-7f,
    -1e-7f,
    0.01f,
    -0.01f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(ErfSpecial, gt_erf, dfm::erf, kErfSpecials, kErfMaxUlps)


================================================
FILE: tests/fast_math/eval.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

namespace dispenso {
namespace fast_math {
namespace detail {
double doNotOptimize = 0.0;
} // namespace detail
} // namespace fast_math
} // namespace dispenso


================================================
FILE: tests/fast_math/eval.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cmath>
#include <cstdio>

#include <dispenso/fast_math/util.h>

// Float-precision math constants to avoid double-to-float conversion warnings.
constexpr float kPi = static_cast<float>(M_PI);
constexpr float kPi_2 = static_cast<float>(M_PI_2);
constexpr float kPi_4 = static_cast<float>(M_PI_4);

// Reduce accuracy coverage for sanitizer and debug builds, which are much slower.
// Clang uses __has_feature, GCC uses __SANITIZE_ADDRESS__/__SANITIZE_THREAD__.
#if defined(__has_feature)
#if __has_feature(address_sanitizer)
#define REDUCE_TEST_CASES
#elif __has_feature(thread_sanitizer)
#define REDUCE_TEST_CASES
#endif // relevant feature
#endif // has_feature
#if !defined(REDUCE_TEST_CASES)
#if defined(__SANITIZE_ADDRESS__) || defined(__SANITIZE_THREAD__) || !defined(NDEBUG)
#define REDUCE_TEST_CASES
#endif
#endif

namespace dispenso {
namespace fast_math {

namespace detail {

#if !defined(REDUCE_TEST_CASES)
// Use this to skip denormals
inline float nextafter(float f) {
  if (f == 0.0f) {
    return std::numeric_limits<float>::min();
  }

  uint32_t u = bit_cast<uint32_t>(f);
  if (u & 0x80000000) { // negative
    if (u & 0x7f800000) {
      --u;
      if (u & 0x7f800000) {
        return bit_cast<float>(u);
      } else {
        return 0.0f;
      }
    } else {
      return 0.0f;
    }
  } else {
    ++u;
    return bit_cast<float>(u);
  }
}
#else
// Use this to skip denormals, also to skip many "nexts" at a time to speed up tests
inline float nextafter(float f) {
  if (f >= 0.0f && f < std::numeric_limits<float>::min()) {
    return std::numeric_limits<float>::min();
  }

  uint32_t u = bit_cast<uint32_t>(f);
  if (u & 0x80000000) { // negative
    if (u & 0x7f800000) {
      u -= 100;
      if (u & 0x7f800000) {
        return bit_cast<float>(u);
      } else {
        return 0.0f;
      }
    } else {
      return 0.0f;
    }
  } else {
    u += 100;
    return bit_cast<float>(u);
  }
}
#endif // REDUCE_TEST_CASES

} // namespace detail

template <typename Func>
void evalForEach(float rangeBegin, float rangeEnd, Func func) {
  for (float f = rangeBegin; f <= rangeEnd; f = detail::nextafter(f)) {
    func(f);
  }
}

template <typename GroundTruthFunc, typename ApproxFunc>
uint32_t evalAccuracy(
    GroundTruthFunc gtfunc,
    ApproxFunc apxfunc,
    float rangeBegin = -std::numeric_limits<float>::max(),
    float rangeEnd = std::numeric_limits<float>::max()) {
  uint32_t ulps = 0;
  for (float f = rangeBegin; f <= rangeEnd; f = detail::nextafter(f)) {
    float gt = gtfunc(f);
    float a = apxfunc(f);
    uint32_t lulps = float_distance(gt, a);
    ulps = std::max(ulps, lulps);

    if (lulps > 100) {
      printf("%d, f(%.9g): %.9g, %.9g\n", lulps, f, gt, a);
    }
  }
  return ulps;
}

template <typename GroundTruthFunc, typename ApproxFunc>
float evalAccuracyAbs(
    GroundTruthFunc gtfunc,
    ApproxFunc apxfunc,
    float rangeBegin = -std::numeric_limits<float>::max(),
    float rangeEnd = std::numeric_limits<float>::max()) {
  float maxErr = 0.0f;
  for (float f = rangeBegin; f <= rangeEnd; f = detail::nextafter(f)) {
    float gt = gtfunc(f);
    float a = apxfunc(f);
    maxErr = std::max(maxErr, std::fabs(gt - a));
  }
  return maxErr;
}

} // namespace fast_math
} // namespace dispenso


================================================
FILE: tests/fast_math/exp10_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

float groundTruth(float input) {
  return ::powf(10.0f, input);
}

auto exp10_accurate = dispenso::fast_math::exp10<float, dispenso::fast_math::MaxAccuracyTraits>;

TEST(Exp10, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(exp10_accurate(0.0f), 1.0f);
  EXPECT_EQ(exp10_accurate(-kInf), 0.0f);
  EXPECT_EQ(exp10_accurate(kInf), kInf);
  EXPECT_NE(exp10_accurate(kNaN), exp10_accurate(kNaN));
}

TEST(Exp10, Range) {
  uint32_t res = dispenso::fast_math::evalAccuracy(groundTruth, exp10_accurate, -40.0f, 40.0f);
  EXPECT_LE(res, 2u);
}

TEST(Exp10LessAccurate, RangeMedium) {
  uint32_t res = dispenso::fast_math::evalAccuracy(
      groundTruth, dispenso::fast_math::exp10<float>, -37.0f, 38.0f);
  EXPECT_LE(res, 2u);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt exp10_max(Flt x) {
  return dfm::exp10<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kExp10MaxUlps = 2;
FAST_MATH_ACCURACY_TESTS(Exp10MaxAccAll, groundTruth, exp10_max, -40.0f, 40.0f, kExp10MaxUlps)
FAST_MATH_ACCURACY_TESTS(Exp10DefaultAll, groundTruth, dfm::exp10, -37.0f, 38.0f, kExp10MaxUlps)

// Special values tested across all SIMD backends.
// MaxAccuracy handles NaN/Inf.
static const float kExp10MaxAccSpecials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -1.0f,
    2.0f,
    -2.0f,
    38.0f,
    -38.0f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(
    Exp10MaxAccSpecial,
    groundTruth,
    exp10_max,
    kExp10MaxAccSpecials,
    kExp10MaxUlps)

// Default traits only works for in-range floats.
static const float kExp10DefaultSpecials[] =
    {0.0f, -0.0f, 1.0f, -1.0f, 2.0f, -2.0f, 10.0f, -10.0f, 0.5f, -0.5f};
FAST_MATH_SPECIAL_TESTS(
    Exp10DefaultSpecial,
    groundTruth,
    dfm::exp10,
    kExp10DefaultSpecials,
    kExp10MaxUlps)


================================================
FILE: tests/fast_math/exp2_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

TEST(Exp2, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  auto exp2 = dispenso::fast_math::exp2<float, dispenso::fast_math::MaxAccuracyTraits>;

  EXPECT_EQ(exp2(0.0f), 1.0f);
  EXPECT_EQ(exp2(-kInf), 0.0f);
  EXPECT_EQ(exp2(kInf), kInf);
  EXPECT_NE(exp2(kNaN), exp2(kNaN));
}

#ifdef _WIN32
auto gtfunc = [](float f) { return static_cast<float>(::exp2l(f)); };
#else
auto gtfunc = ::exp2f;
#endif //_WIN32

TEST(Exp2, RangeSmall) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(gtfunc, dispenso::fast_math::exp2<float>, -127.0f, -1.0f);
  EXPECT_LE(res, 1u);
}

TEST(Exp2, RangeMedium) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(gtfunc, dispenso::fast_math::exp2<float>, -1.0f, 0.0f);
  EXPECT_LE(res, 1u);
}

TEST(Exp2, RangeLarge) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(gtfunc, dispenso::fast_math::exp2<float>, 0.0f, 128.0f);
  EXPECT_LE(res, 1u);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kExp2MaxUlps = 1;
FAST_MATH_ACCURACY_TESTS(Exp2All, gtfunc, dfm::exp2, -127.0f, 128.0f, kExp2MaxUlps)

// Special values tested across all SIMD backends.
// Default exp2 doesn't handle NaN/Inf — only test in-range values.
static const float kExp2Specials[] =
    {0.0f, -0.0f, 1.0f, -1.0f, 3.5f, -3.5f, 10.0f, -10.0f, 0.5f, 127.0f, -126.0f};
FAST_MATH_SPECIAL_TESTS(Exp2Special, gtfunc, dfm::exp2, kExp2Specials, kExp2MaxUlps)


================================================
FILE: tests/fast_math/exp_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

struct BoundsOnlyTraits {
  static constexpr bool kMaxAccuracy = false;
  static constexpr bool kBoundsValues = true;
};

auto exp_accurate = dispenso::fast_math::exp<float, dispenso::fast_math::MaxAccuracyTraits>;

TEST(Exp, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(exp_accurate(0.0f), 1.0f);
  EXPECT_EQ(exp_accurate(-kInf), 0.0f);
  EXPECT_EQ(exp_accurate(kInf), kInf);
  EXPECT_NE(exp_accurate(kNaN), exp_accurate(kNaN));
}

TEST(Exp, RangeSmall) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::expf, exp_accurate, -127.0f, -40.0f);
  EXPECT_LE(res, 1u);
}

TEST(Exp, RangeSmallish) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::expf, exp_accurate, -40.0f, 20.0f);
  EXPECT_LE(res, 1u);
}
TEST(Exp, RangeMedium) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::expf, exp_accurate, 20.0f, 40.0f);
  EXPECT_LE(res, 1u);
}

TEST(Exp, RangeLarge) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::expf, exp_accurate, 40.0f, 128.0f);
  EXPECT_LE(res, 1u);
}

auto exp_bounds = dispenso::fast_math::exp<float, BoundsOnlyTraits>;

TEST(ExpLessAccurateWBounds, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(exp_bounds(0.0f), 1.0f);
  EXPECT_EQ(exp_bounds(-kInf), 0.0f);
  EXPECT_EQ(exp_bounds(kInf), kInf);
  EXPECT_NE(exp_bounds(kNaN), exp_bounds(kNaN));
  EXPECT_LT(exp_bounds(-100.0f), 1e-38f);
}

TEST(ExpLessAccurateWBounds, Range_m100_100) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::expf, exp_bounds, -100.0f, 100.0f);
  EXPECT_LE(res, 1u);
}

TEST(ExpLessAccurate, Range_m88_88) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(::expf, dispenso::fast_math::exp<float>, -88.0f, 88.0f);
  EXPECT_LE(res, 3u);
}

// Wrappers for traits variants — macro instantiates func<Flt>.
template <typename Flt>
Flt exp_max_fn(Flt x) {
  return dfm::exp<Flt, dfm::MaxAccuracyTraits>(x);
}

template <typename Flt>
Flt exp_bounds_fn(Flt x) {
  return dfm::exp<Flt, BoundsOnlyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kExpMaxAccMaxUlps = 1;
constexpr uint32_t kExpBoundsMaxUlps = 1;
constexpr uint32_t kExpDefaultMaxUlps = 3;
FAST_MATH_ACCURACY_TESTS(ExpMaxAccAll, ::expf, exp_max_fn, -127.0f, 128.0f, kExpMaxAccMaxUlps)
FAST_MATH_ACCURACY_TESTS(ExpBoundsAll, ::expf, exp_bounds_fn, -100.0f, 100.0f, kExpBoundsMaxUlps)
FAST_MATH_ACCURACY_TESTS(ExpDefaultAll, ::expf, dfm::exp, -88.0f, 88.0f, kExpDefaultMaxUlps)

// Special values tested across all SIMD backends.
// MaxAccuracy handles NaN/Inf.
static const float kExpMaxAccSpecials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -1.0f,
    5.0f,
    -5.0f,
    10.0f,
    -10.0f,
    88.7f,
    -88.7f,
    1e-10f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(ExpMaxAccSpecial, ::expf, exp_max_fn, kExpMaxAccSpecials, kExpMaxAccMaxUlps)

// BoundsOnly handles NaN/Inf and clamps out-of-range inputs.
static const float kExpBoundsSpecials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -1.0f,
    5.0f,
    -5.0f,
    89.0f,
    -100.0f,
    200.0f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(
    ExpBoundsSpecial,
    ::expf,
    exp_bounds_fn,
    kExpBoundsSpecials,
    kExpBoundsMaxUlps)


================================================
FILE: tests/fast_math/expm1_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

static float gt_expm1(float x) {
  return static_cast<float>(std::expm1(static_cast<double>(x)));
}

TEST(Expm1, SpecialValues) {
  EXPECT_EQ(dfm::expm1(0.0f), 0.0f);
  EXPECT_FLOAT_EQ(dfm::expm1(1.0f), gt_expm1(1.0f));
  EXPECT_FLOAT_EQ(dfm::expm1(-1.0f), gt_expm1(-1.0f));
}

TEST(Expm1, NearZero) {
  // The primary use case: x very close to 0 where exp(x)-1 cancels.
  float xs[] = {1e-7f, -1e-7f, 1e-5f, -1e-5f, 1e-3f, -1e-3f, 0.01f, -0.01f, 0.1f, -0.1f};
  for (float x : xs) {
    float expected = gt_expm1(x);
    float result = dfm::expm1(x);
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, 2u) << "expm1(" << x << "): expected=" << expected << " got=" << result;
  }
}

TEST(Expm1, RangeSmall) {
  uint32_t ulps = dfm::evalAccuracy(gt_expm1, dfm::expm1<float>, -0.5f, 0.5f);
  EXPECT_LE(ulps, 2u);
}

TEST(Expm1, RangeMedium) {
  uint32_t ulps = dfm::evalAccuracy(gt_expm1, dfm::expm1<float>, -10.0f, 10.0f);
  EXPECT_LE(ulps, 2u);
}

TEST(Expm1, RangeLarge) {
  uint32_t ulps = dfm::evalAccuracy(gt_expm1, dfm::expm1<float>, -88.0f, 88.0f);
  EXPECT_LE(ulps, 2u);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kExpm1MaxUlps = 2;
FAST_MATH_ACCURACY_TESTS(Expm1All, gt_expm1, dfm::expm1, -88.0f, 88.0f, kExpm1MaxUlps)

// Special values tested across all SIMD backends.
static const float kExpm1Specials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -1.0f,
    0.5f,
    -0.5f,
    0.001f,
    -0.001f,
    1e-4f,
    -1e-4f,
    1e-7f,
    -1e-7f,
    5.0f,
    -5.0f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(Expm1Special, gt_expm1, dfm::expm1, kExpm1Specials, kExpm1MaxUlps)


================================================
FILE: tests/fast_math/frexp_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

TEST(Frexp, SpecialVals) {
  int exp;
  auto res = dispenso::fast_math::frexp(-std::numeric_limits<float>::infinity(), &exp);
  EXPECT_EQ(res, -std::numeric_limits<float>::infinity());
  res = dispenso::fast_math::frexp(std::numeric_limits<float>::infinity(), &exp);
  EXPECT_EQ(res, std::numeric_limits<float>::infinity());

  res = dispenso::fast_math::frexp(std::numeric_limits<float>::quiet_NaN(), &exp);
  EXPECT_NE(res, res);
}

// evalForEach skips subnormals (nextafter jumps from 0 to float::min), which
// is consistent with dispenso::frexp's intentional subnormal pass-through.
TEST(Frexp, RangeNeg) {
  dispenso::fast_math::evalForEach(-std::numeric_limits<float>::max(), 0.0f, [](float f) {
    int exp;
    int expT;
    float a = ::frexpf(f, &exp);
    float aT = dispenso::fast_math::frexp(f, &expT);
    EXPECT_EQ(exp, expT) << "frexpf(" << f << ",&exp)";
    EXPECT_EQ(a, aT) << "frexpf(" << f << ",&exp)";
  });
}

TEST(Frexp, RangePos) {
  dispenso::fast_math::evalForEach(0.0f, std::numeric_limits<float>::max(), [](float f) {
    int exp;
    int expT;
    float a = ::frexpf(f, &exp);
    float aT = dispenso::fast_math::frexp(f, &expT);
    EXPECT_EQ(exp, expT) << "frexpf(" << f << ",&exp)";
    EXPECT_EQ(a, aT) << "frexpf(" << f << ",&exp)";
  });
}

// Unified SIMD frexp test — verify mantissa and exponent per lane.
// Uses SimdTestTraits for float load/store, and stores int exponent via
// aligned buffer + platform-specific store (frexp output is IntType_t<Flt>).
//
// Note: subnormals (denorm_min, etc.) are excluded. dispenso::frexp passes
// subnormals through unchanged with exponent 0, rather than normalizing them
// into [0.5, 1) as std::frexp does. This is intentional — the ldexp(frexp(x))
// round-trip still works, but the mantissa contract is violated. See the frexp
// docstring in fast_math.h for details.
static const float kFrexpInputs[] = {
    0.0f,
    1.0f,
    -1.0f,
    0.5f,
    -0.5f,
    2.0f,
    -2.0f,
    100.0f,
    0.001f,
    1e-20f,
    1e20f,
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::max(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::quiet_NaN()};

template <typename Flt>
void checkFrexpSimd() {
  using Traits = SimdTestTraits<Flt>;
  using IntT = dfm::IntType_t<Flt>;
  const int32_t N = Traits::laneCount();
  constexpr int32_t numInputs =
      static_cast<int32_t>(sizeof(kFrexpInputs) / sizeof(kFrexpInputs[0]));

  alignas(64) float buf[kMaxSimdLanes];
  alignas(64) float outMantissa[kMaxSimdLanes];

  for (int32_t base = 0; base < numInputs; base += N) {
    int32_t count = std::min(N, numInputs - base);
    for (int32_t i = 0; i < count; ++i) {
      buf[i] = kFrexpInputs[base + i];
    }
    for (int32_t i = count; i < N; ++i) {
      buf[i] = buf[count - 1];
    }

    IntT exponent;
    Flt result = dfm::frexp(Traits::load(buf), &exponent);
    Traits::store(result, outMantissa);

    // Extract int exponents by storing the result via bit_cast to float.
    alignas(64) float expAsFloat[kMaxSimdLanes];
    Traits::store(dfm::bit_cast<Flt>(exponent), expAsFloat);

    for (int32_t i = 0; i < count; ++i) {
      int scalarExp;
      float scalarMantissa = ::frexpf(buf[i], &scalarExp);

      if (std::isnan(buf[i])) {
        EXPECT_TRUE(std::isnan(outMantissa[i])) << "frexp(NaN) mantissa should be NaN";
      } else if (std::isinf(buf[i])) {
        EXPECT_EQ(outMantissa[i], buf[i]) << "frexp(inf) mantissa should be inf";
      } else {
        EXPECT_EQ(outMantissa[i], scalarMantissa) << "frexp(" << buf[i] << ") mantissa mismatch";
        int32_t actualExp = dfm::bit_cast<int32_t>(expAsFloat[i]);
        EXPECT_EQ(actualExp, scalarExp)
            << "frexp(" << buf[i] << ") exponent mismatch: got " << actualExp;
      }
    }
  }
}

#if defined(__SSE4_1__)
TEST(FrexpSse, SpecialVals) {
  checkFrexpSimd<__m128>();
}
#endif
#if defined(__AVX2__)
TEST(FrexpAvx, SpecialVals) {
  checkFrexpSimd<__m256>();
}
#endif
#if defined(__AVX512F__)
TEST(FrexpAvx512, SpecialVals) {
  checkFrexpSimd<__m512>();
}
#endif
#if defined(__aarch64__)
TEST(FrexpNeon, SpecialVals) {
  checkFrexpSimd<float32x4_t>();
}
#endif
#if __has_include("hwy/highway.h")
TEST(FrexpHwy, SpecialVals) {
  checkFrexpSimd<dfm::HwyFloat>();
}
#endif


================================================
FILE: tests/fast_math/hwy_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <cmath>
#include <limits>

#include <gtest/gtest.h>

#if __has_include("hwy/highway.h")
#include "hwy/highway.h"

namespace dfm = dispenso::fast_math;
namespace hn = hwy::HWY_NAMESPACE;
using HwyFloat = dfm::HwyFloat;
using HwyInt32 = dfm::HwyInt32;
using HwyUint32 = dfm::HwyUint32;
using HwyFloatTag = dfm::HwyFloatTag;
using HwyInt32Tag = dfm::HwyInt32Tag;
using HwyUint32Tag = dfm::HwyUint32Tag;

// Raw Highway vector type aliases — used in tests to exercise SimdTypeFor forwarding.
using HwyVecF = hn::Vec<HwyFloatTag>;
using HwyVecI = hn::Vec<HwyInt32Tag>;
using HwyVecU = hn::Vec<HwyUint32Tag>;

// Lane count (constant per compile-time target).
static size_t N() {
  return hn::Lanes(HwyFloatTag{});
}

constexpr size_t kMaxLanes = HWY_MAX_BYTES / sizeof(float);

// Helpers: extract lane i from Highway vectors.
static float lane(HwyVecF v, size_t i) {
  HWY_ALIGN float buf[kMaxLanes];
  hn::StoreU(v, HwyFloatTag{}, buf);
  return buf[i];
}
static float lane(HwyFloat v, size_t i) {
  return lane(v.v, i);
}

static int32_t lane(HwyVecI v, size_t i) {
  HWY_ALIGN int32_t buf[kMaxLanes];
  hn::StoreU(v, HwyInt32Tag{}, buf);
  return buf[i];
}
static int32_t lane(HwyInt32 v, size_t i) {
  return lane(v.v, i);
}

static uint32_t lane(HwyVecU v, size_t i) {
  HWY_ALIGN uint32_t buf[kMaxLanes];
  hn::StoreU(v, HwyUint32Tag{}, buf);
  return buf[i];
}
static uint32_t lane(HwyUint32 v, size_t i) {
  return lane(v.v, i);
}

// Helper: create HwyVecF from an array of values (must have N() entries).
static HwyVecF loadF(const float* vals) {
  return hn::LoadU(HwyFloatTag{}, vals);
}

// Helper: create HwyVecI from an array of values.
static HwyVecI loadI(const int32_t* vals) {
  return hn::LoadU(HwyInt32Tag{}, vals);
}

// ---- HwyFloat basic arithmetic ----

TEST(HwyFloat, Broadcast) {
  HwyFloat v(3.0f);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(v, i), 3.0f);
  }
}

TEST(HwyFloat, Arithmetic) {
  // Use sequential values: 1, 2, 3, ...
  HWY_ALIGN float a_vals[kMaxLanes];
  HWY_ALIGN float b_vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    a_vals[i] = static_cast<float>(i + 1);
    b_vals[i] = static_cast<float>(i + 5);
  }
  HwyFloat a = loadF(a_vals);
  HwyFloat b = loadF(b_vals);

  HwyFloat sum = a + b;
  HwyFloat diff = a - b;
  HwyFloat prod = a * b;
  HwyFloat quot = a / b;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(sum, i), a_vals[i] + b_vals[i]);
    EXPECT_EQ(lane(diff, i), a_vals[i] - b_vals[i]);
    EXPECT_EQ(lane(prod, i), a_vals[i] * b_vals[i]);
    EXPECT_NEAR(lane(quot, i), a_vals[i] / b_vals[i], 1e-6f);
  }
}

TEST(HwyFloat, Negation) {
  HWY_ALIGN float vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = static_cast<float>(i + 1);
  }
  HwyFloat v = loadF(vals);
  HwyFloat neg = -v;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(neg, i), -vals[i]);
  }

  // Test -0.0f: negation should flip the sign bit.
  HwyFloat z(0.0f);
  HwyFloat negz = -z;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_TRUE(std::signbit(lane(negz, i)));
  }
}

TEST(HwyFloat, CompoundAssignment) {
  HwyFloat a(2.0f);
  HwyFloat b(3.0f);

  HwyFloat c = a;
  c += b;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(c, i), 5.0f);
  }

  c = a;
  c -= b;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(c, i), -1.0f);
  }

  c = a;
  c *= b;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(c, i), 6.0f);
  }
}

// ---- HwyFloat comparisons ----

TEST(HwyFloat, Comparisons) {
  HwyFloat a(1.0f);
  HwyFloat b(2.0f);

  // a < b: all lanes should be true (all-ones mask).
  HwyFloat lt = a < b;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_NE(dfm::bit_cast<uint32_t>(lane(lt, i)), 0u) << "Lane " << i;
  }

  // a > b: all lanes should be false.
  HwyFloat gt = a > b;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(gt, i)), 0u) << "Lane " << i;
  }

  // a == a: all lanes should be true.
  HwyFloat eq = a == a;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(eq, i)), 0xFFFFFFFFu) << "Lane " << i;
  }

  // a != b: all lanes should be true.
  HwyFloat ne = a != b;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_NE(dfm::bit_cast<uint32_t>(lane(ne, i)), 0u) << "Lane " << i;
  }
}

TEST(HwyFloat, LogicalNot) {
  HwyFloat a(1.0f);
  HwyFloat b(2.0f);
  HwyFloat mask = a < b; // all true
  HwyFloat notmask = !mask;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(notmask, i)), 0u) << "Lane " << i;
  }
}

// ---- HwyInt32 ----

TEST(HwyInt32, ArithmeticAndShifts) {
  HWY_ALIGN int32_t a_vals[kMaxLanes];
  HWY_ALIGN int32_t b_vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    a_vals[i] = static_cast<int32_t>(10 + i);
    b_vals[i] = static_cast<int32_t>(3 + i);
  }
  HwyInt32 a = loadI(a_vals);
  HwyInt32 b = loadI(b_vals);

  HwyInt32 sum = a + b;
  HwyInt32 diff = a - b;
  HwyInt32 prod = a * b;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(sum, i), a_vals[i] + b_vals[i]);
    EXPECT_EQ(lane(diff, i), a_vals[i] - b_vals[i]);
    EXPECT_EQ(lane(prod, i), a_vals[i] * b_vals[i]);
  }

  // Shifts.
  HwyInt32 shl = a << 2;
  HwyInt32 shr = a >> 1;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(shl, i), a_vals[i] << 2);
    EXPECT_EQ(lane(shr, i), a_vals[i] >> 1);
  }
}

TEST(HwyInt32, Negation) {
  HwyInt32 a(42);
  HwyInt32 neg = -a;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(neg, i), -42);
  }
}

TEST(HwyInt32, Bitwise) {
  HwyInt32 a(0xFF00FF00);
  HwyInt32 b(0x0F0F0F0F);

  HwyInt32 andResult = a & b;
  HwyInt32 orResult = a | b;
  HwyInt32 xorResult = a ^ b;
  HwyInt32 notResult = ~a;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(andResult, i), int32_t(0xFF00FF00 & 0x0F0F0F0F));
    EXPECT_EQ(lane(orResult, i), int32_t(0xFF00FF00 | 0x0F0F0F0F));
    EXPECT_EQ(lane(xorResult, i), int32_t(0xFF00FF00 ^ 0x0F0F0F0F));
    EXPECT_EQ(lane(notResult, i), ~int32_t(0xFF00FF00));
  }
}

TEST(HwyInt32, Comparisons) {
  HwyInt32 a(5);
  HwyInt32 b(10);

  HwyInt32 lt = a < b;
  HwyInt32 gt = a > b;
  HwyInt32 eq = a == a;
  HwyInt32 ne = a != b;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(lt, i), -1); // all-ones
    EXPECT_EQ(lane(gt, i), 0);
    EXPECT_EQ(lane(eq, i), -1);
    EXPECT_EQ(lane(ne, i), -1);
  }
}

// ---- HwyUint32 ----

TEST(HwyUint32, ArithmeticAndShifts) {
  HwyUint32 a(100u);
  HwyUint32 b(30u);

  HwyUint32 sum = a + b;
  HwyUint32 diff = a - b;
  HwyUint32 prod = a * b;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(sum, i), 130u);
    EXPECT_EQ(lane(diff, i), 70u);
    EXPECT_EQ(lane(prod, i), 3000u);
  }

  // Logical right shift.
  HwyUint32 shr = a >> 2;
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(shr, i), 25u);
  }
}

TEST(HwyUint32, UnsignedComparisons) {
  HwyUint32 a(0x80000001u); // large unsigned value
  HwyUint32 b(1u);

  HwyUint32 gt = a > b;
  HwyUint32 lt = a < b;

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(gt, i), 0xFFFFFFFFu) << "Lane " << i; // a > b unsigned
    EXPECT_EQ(lane(lt, i), 0u) << "Lane " << i;
  }
}

// ---- bit_cast ----

TEST(HwyBitCast, FloatIntRoundTrip) {
  HwyFloat f(1.0f);
  HwyInt32 i = dfm::bit_cast<HwyInt32>(f);
  HwyFloat f2 = dfm::bit_cast<HwyFloat>(i);

  for (size_t j = 0; j < N(); ++j) {
    EXPECT_EQ(lane(f2, j), 1.0f);
    EXPECT_EQ(lane(i, j), 0x3f800000);
  }
}

TEST(HwyBitCast, FloatUintRoundTrip) {
  HwyFloat f(-1.0f);
  HwyUint32 u = dfm::bit_cast<HwyUint32>(f);
  HwyFloat f2 = dfm::bit_cast<HwyFloat>(u);

  for (size_t j = 0; j < N(); ++j) {
    EXPECT_EQ(lane(f2, j), -1.0f);
    EXPECT_EQ(lane(u, j), 0xBF800000u);
  }
}

// ---- FloatTraits ----

TEST(HwyFloatTraits, Conditional) {
  using FT = dfm::FloatTraits<HwyFloat>;

  // Create a mask: alternating true/false per lane.
  HWY_ALIGN float vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = (i % 2 == 0) ? 1.0f : 2.0f;
  }
  HwyVecF input = loadF(vals);
  HwyFloat mask = input < HwyFloat(1.5f); // even lanes true, odd lanes false

  HwyFloat x(10.0f);
  HwyFloat y(20.0f);
  HwyFloat result = FT::conditional(mask, x, y);

  for (size_t i = 0; i < N(); ++i) {
    float expected = (i % 2 == 0) ? 10.0f : 20.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(HwyFloatTraits, ConditionalInt32) {
  using FT = dfm::FloatTraits<HwyFloat>;

  HWY_ALIGN float vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = (i % 2 == 0) ? 1.0f : 2.0f;
  }
  HwyFloat mask = loadF(vals) < HwyFloat(1.5f);

  HwyInt32 x(100);
  HwyInt32 y(200);
  HwyInt32 result = FT::conditional(mask, x, y);

  for (size_t i = 0; i < N(); ++i) {
    int32_t expected = (i % 2 == 0) ? 100 : 200;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(HwyFloatTraits, ConditionalWithLaneWideMask) {
  using FT = dfm::FloatTraits<HwyFloat>;

  // Create an integer lane-wide mask (alternating -1/0).
  HWY_ALIGN int32_t mask_vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    mask_vals[i] = (i % 2 == 0) ? -1 : 0;
  }
  HwyInt32 mask = loadI(mask_vals);

  HwyFloat x(10.0f);
  HwyFloat y(20.0f);
  HwyFloat result = FT::conditional(mask, x, y);

  for (size_t i = 0; i < N(); ++i) {
    float expected = (i % 2 == 0) ? 10.0f : 20.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(HwyFloatTraits, Apply) {
  using FT = dfm::FloatTraits<HwyFloat>;

  HwyFloat mask = HwyFloat(1.0f) < HwyFloat(2.0f); // all true
  HwyFloat x(42.0f);
  HwyFloat result = FT::apply(mask, x);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result, i), 42.0f) << "Lane " << i;
  }

  // All false mask.
  HwyFloat falseMask = HwyFloat(2.0f) < HwyFloat(1.0f);
  HwyFloat result2 = FT::apply(falseMask, x);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result2, i), 0.0f) << "Lane " << i;
  }
}

TEST(HwyFloatTraits, FmaAndSqrt) {
  using FT = dfm::FloatTraits<HwyFloat>;

  HwyFloat a(2.0f);
  HwyFloat b(3.0f);
  HwyFloat c(4.0f);

  // fma: a*b + c = 2*3 + 4 = 10
  HwyFloat fma_result = FT::fma(a, b, c);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_NEAR(lane(fma_result, i), 10.0f, 1e-6f);
  }

  // sqrt(4.0) = 2.0
  HwyFloat sq = FT::sqrt(c);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(sq, i), 2.0f);
  }
}

TEST(HwyFloatTraits, MinMax) {
  using FT = dfm::FloatTraits<HwyFloat>;

  HWY_ALIGN float a_vals[kMaxLanes];
  HWY_ALIGN float b_vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    a_vals[i] = static_cast<float>(i);
    b_vals[i] = static_cast<float>(N() - 1 - i);
  }
  HwyFloat a = loadF(a_vals);
  HwyFloat b = loadF(b_vals);

  HwyFloat mn = FT::min(a, b);
  HwyFloat mx = FT::max(a, b);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(mn, i), std::min(a_vals[i], b_vals[i])) << "Lane " << i;
    EXPECT_EQ(lane(mx, i), std::max(a_vals[i], b_vals[i])) << "Lane " << i;
  }
}

// ---- Util functions ----

TEST(HwyUtil, FloorSmall) {
  HWY_ALIGN float vals[kMaxLanes];
  float test_vals[] = {1.7f, -1.7f, 2.0f, -0.5f, 3.3f, -3.3f, 0.0f, 100.9f};
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = test_vals[i % 8];
  }
  HwyVecF input = loadF(vals);
  HwyFloat result = dfm::floor_small(input);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result, i), std::floor(vals[i])) << "Lane " << i << " input=" << vals[i];
  }
}

TEST(HwyUtil, ConvertToInt) {
  HWY_ALIGN float vals[kMaxLanes];
  float test_vals[] = {1.5f, -1.5f, 2.5f, -2.5f, 3.5f, -3.5f, 0.4f, 100.6f};
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = test_vals[i % 8];
  }
  HwyVecF input = loadF(vals);
  HwyInt32 result = dfm::convert_to_int(input);

  for (size_t i = 0; i < N(); ++i) {
    int32_t expected = static_cast<int32_t>(std::nearbyint(vals[i]));
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i << " input=" << vals[i];
  }
}

TEST(HwyUtil, ConvertToIntNaN) {
  HwyFloat nan_val(std::numeric_limits<float>::quiet_NaN());
  HwyInt32 result = dfm::convert_to_int(nan_val);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result, i), 0) << "Lane " << i;
  }
}

TEST(HwyUtil, Gather) {
  float table[16];
  for (int j = 0; j < 16; ++j) {
    table[j] = static_cast<float>(j * 10);
  }

  HWY_ALIGN int32_t idx[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    idx[i] = static_cast<int32_t>(i % 16);
  }
  HwyInt32 index = loadI(idx);
  HwyFloat result = dfm::gather<HwyFloat>(table, index);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result, i), table[idx[i]]) << "Lane " << i;
  }
}

TEST(HwyUtil, IntDivBy3) {
  HWY_ALIGN int32_t vals[kMaxLanes];
  int32_t test_vals[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48};
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = test_vals[i % 16];
  }
  HwyInt32 input = loadI(vals);
  HwyInt32 result = dfm::int_div_by_3(input);

  for (size_t i = 0; i < N(); ++i) {
    int32_t expected = dfm::int_div_by_3(vals[i]);
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i << " input=" << vals[i];
  }
}

TEST(HwyUtil, Signof) {
  HWY_ALIGN float vals[kMaxLanes];
  float test_vals[] = {1.0f, -1.0f, 100.0f, -0.5f, 0.0f, -0.0f, 42.0f, -42.0f};
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = test_vals[i % 8];
  }
  HwyVecF input = loadF(vals);
  HwyFloat result = dfm::signof(input);

  for (size_t i = 0; i < N(); ++i) {
    float expected = std::signbit(vals[i]) ? -1.0f : 1.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i << " input=" << vals[i];
  }
}

TEST(HwyUtil, Signofi) {
  HWY_ALIGN int32_t vals[kMaxLanes];
  int32_t test_vals[] = {1, -1, 0, -100, 50, -50, 0, 1000};
  for (size_t i = 0; i < N(); ++i) {
    vals[i] = test_vals[i % 8];
  }
  HwyInt32 input = loadI(vals);
  HwyInt32 result = dfm::signofi(input);

  for (size_t i = 0; i < N(); ++i) {
    int32_t expected = (vals[i] < 0) ? -1 : 1;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i << " input=" << vals[i];
  }
}

TEST(HwyUtil, Nonnormal) {
  HwyFloat inf_val(std::numeric_limits<float>::infinity());
  HwyFloat nan_val(std::numeric_limits<float>::quiet_NaN());
  HwyFloat normal_val(1.0f);

  HwyInt32 inf_nn = dfm::nonnormal(inf_val);
  HwyInt32 nan_nn = dfm::nonnormal(nan_val);
  HwyInt32 normal_nn = dfm::nonnormal(normal_val);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(inf_nn, i), -1) << "Inf should be nonnormal";
    EXPECT_EQ(lane(nan_nn, i), -1) << "NaN should be nonnormal";
    EXPECT_EQ(lane(normal_nn, i), 0) << "1.0 should not be nonnormal";
  }
}

TEST(HwyUtil, BoolAsOne) {
  HwyFloat true_mask = HwyFloat(1.0f) < HwyFloat(2.0f); // all true
  HwyFloat false_mask = HwyFloat(2.0f) < HwyFloat(1.0f); // all false

  HwyFloat true_one = dfm::bool_as_one<HwyFloat, HwyFloat>(true_mask);
  HwyFloat false_one = dfm::bool_as_one<HwyFloat, HwyFloat>(false_mask);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(true_one, i), 1.0f) << "Lane " << i;
    EXPECT_EQ(lane(false_one, i), 0.0f) << "Lane " << i;
  }
}

TEST(HwyUtil, NBoolAsOne) {
  HwyFloat true_mask = HwyFloat(1.0f) < HwyFloat(2.0f);
  HwyFloat false_mask = HwyFloat(2.0f) < HwyFloat(1.0f);

  HwyFloat true_nbo = dfm::nbool_as_one<HwyFloat, HwyFloat>(true_mask);
  HwyFloat false_nbo = dfm::nbool_as_one<HwyFloat, HwyFloat>(false_mask);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(true_nbo, i), 0.0f) << "Lane " << i;
    EXPECT_EQ(lane(false_nbo, i), 1.0f) << "Lane " << i;
  }
}

TEST(HwyUtil, BoolAsMask) {
  HwyFloat true_mask = HwyFloat(1.0f) < HwyFloat(2.0f);
  HwyInt32 int_mask = dfm::bool_as_mask<HwyInt32, HwyFloat>(true_mask);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(int_mask, i), -1) << "Lane " << i; // all-ones = -1
  }
}

TEST(HwyUtil, BoolApplyOrZero) {
  HWY_ALIGN float mask_vals[kMaxLanes];
  for (size_t i = 0; i < N(); ++i) {
    mask_vals[i] = (i % 2 == 0) ? 1.0f : 2.0f;
  }
  HwyFloat input = loadF(mask_vals);
  HwyFloat mask = input < HwyFloat(1.5f); // even lanes true

  HwyFloat value(42.0f);
  HwyFloat result = dfm::bool_apply_or_zero(mask, value);

  for (size_t i = 0; i < N(); ++i) {
    float expected = (i % 2 == 0) ? 42.0f : 0.0f;
    EXPECT_EQ(lane(result, i), expected) << "Lane " << i;
  }
}

TEST(HwyUtil, ClampAllowNan) {
  HwyFloat x(3.0f);
  HwyFloat mn(1.0f);
  HwyFloat mx(2.0f);
  HwyFloat result = dfm::clamp_allow_nan(x, mn, mx);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result, i), 2.0f) << "Lane " << i;
  }

  // NaN should propagate.
  HwyFloat nan_val(std::numeric_limits<float>::quiet_NaN());
  HwyFloat nan_result = dfm::clamp_allow_nan(nan_val, mn, mx);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_TRUE(std::isnan(lane(nan_result, i))) << "Lane " << i;
  }
}

TEST(HwyUtil, ClampNoNan) {
  HwyFloat x(0.5f);
  HwyFloat mn(1.0f);
  HwyFloat mx(2.0f);
  HwyFloat result = dfm::clamp_no_nan(x, mn, mx);

  for (size_t i = 0; i < N(); ++i) {
    EXPECT_EQ(lane(result, i), 1.0f) << "Lane " << i;
  }

  // NaN should be suppressed.
  HwyFloat nan_val(std::numeric_limits<float>::quiet_NaN());
  HwyFloat nan_result = dfm::clamp_no_nan(nan_val, mn, mx);
  for (size_t i = 0; i < N(); ++i) {
    EXPECT_FALSE(std::isnan(lane(nan_result, i))) << "Lane " << i;
    EXPECT_GE(lane(nan_result, i), 1.0f);
    EXPECT_LE(lane(nan_result, i), 2.0f);
  }
}

#else // !__has_include("hwy/highway.h")

// Dummy test so the binary does something when Highway is not available.
TEST(HwyFloat, Unavailable) {
  GTEST_SKIP() << "Highway not available";
}

#endif // __has_include("hwy/highway.h")


================================================
FILE: tests/fast_math/hypot_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <cmath>
#include <limits>
#include <random>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

// Ground truth: double-precision sqrt(x*x + y*y) cast to float.
static float hypotRef(float x, float y) {
  double xd = static_cast<double>(x);
  double yd = static_cast<double>(y);
  return static_cast<float>(std::sqrt(std::fma(xd, xd, yd * yd)));
}

TEST(Hypot, SpecialValues) {
  EXPECT_EQ(dfm::hypot(0.0f, 0.0f), 0.0f);
  EXPECT_EQ(dfm::hypot(3.0f, 0.0f), 3.0f);
  EXPECT_EQ(dfm::hypot(0.0f, 4.0f), 4.0f);
  EXPECT_EQ(dfm::hypot(-3.0f, 0.0f), 3.0f);
  EXPECT_EQ(dfm::hypot(0.0f, -4.0f), 4.0f);
  EXPECT_FLOAT_EQ(dfm::hypot(3.0f, 4.0f), 5.0f);
  EXPECT_FLOAT_EQ(dfm::hypot(1.0f, 1.0f), std::sqrt(2.0f));
}

TEST(Hypot, GridNearZero) {
  constexpr int32_t kMax = 2048;

  float quantizer = 0.5f;
  quantizer *= quantizer;
  quantizer *= quantizer;
  quantizer *= quantizer;
  quantizer *= 0.25f;

  uint32_t maxUlps = 0;
  for (int32_t yi = -kMax; yi <= kMax; ++yi) {
    for (int32_t xi = -kMax; xi <= kMax; ++xi) {
      float x = static_cast<float>(xi) * quantizer;
      float y = static_cast<float>(yi) * quantizer;
      maxUlps = std::max(maxUlps, dfm::float_distance(hypotRef(x, y), dfm::hypot(x, y)));
    }
  }
  EXPECT_LE(maxUlps, 1u);
}

TEST(Hypot, RandomNormal) {
  constexpr size_t kNumSamples = 1 << 24;
  std::mt19937 gen(42);
  std::uniform_real_distribution<float> dis(-1e5f, 1e5f);

  uint32_t maxUlps = 0;
  for (size_t i = 0; i < kNumSamples; ++i) {
    float x = dis(gen);
    float y = dis(gen);
    maxUlps = std::max(maxUlps, dfm::float_distance(hypotRef(x, y), dfm::hypot(x, y)));
  }
  EXPECT_LE(maxUlps, 1u);
}

TEST(Hypot, RandomWide) {
  constexpr size_t kNumSamples = 1 << 24;
  std::mt19937 gen(77);
  std::uniform_real_distribution<float> dis(-1e30f, 1e30f);

  uint32_t maxUlps = 0;
  for (size_t i = 0; i < kNumSamples; ++i) {
    float x = dis(gen);
    float y = dis(gen);
    maxUlps = std::max(maxUlps, dfm::float_distance(hypotRef(x, y), dfm::hypot(x, y)));
  }
  EXPECT_LE(maxUlps, 1u);
}

TEST(Hypot, RandomTiny) {
  constexpr size_t kNumSamples = 1 << 24;
  std::mt19937 gen(99);
  std::uniform_real_distribution<float> dis(-1e-30f, 1e-30f);

  uint32_t maxUlps = 0;
  for (size_t i = 0; i < kNumSamples; ++i) {
    float x = dis(gen);
    float y = dis(gen);
    maxUlps = std::max(maxUlps, dfm::float_distance(hypotRef(x, y), dfm::hypot(x, y)));
  }
  EXPECT_LE(maxUlps, 1u);
}

TEST(Hypot, Symmetry) {
  std::mt19937 gen(123);
  std::uniform_real_distribution<float> dis(-1e10f, 1e10f);

  for (size_t i = 0; i < 100000; ++i) {
    float x = dis(gen);
    float y = dis(gen);
    EXPECT_EQ(dfm::hypot(x, y), dfm::hypot(y, x));
    EXPECT_EQ(dfm::hypot(x, y), dfm::hypot(-x, y));
    EXPECT_EQ(dfm::hypot(x, y), dfm::hypot(x, -y));
    EXPECT_EQ(dfm::hypot(x, y), dfm::hypot(-x, -y));
  }
}

TEST(Hypot, DiagonalSweep) {
  uint32_t maxUlps = 0;
  for (float x = 1e-20f; x < 1e20f; x *= 1.5f) {
    maxUlps = std::max(maxUlps, dfm::float_distance(hypotRef(x, x), dfm::hypot(x, x)));
  }
  EXPECT_LE(maxUlps, 1u);
}

// --- hypot with MaxAccuracyTraits (IEEE boundary conditions) ---

template <typename... Args>
static float hypotBounds(Args... args) {
  return dfm::hypot<float, dfm::MaxAccuracyTraits>(args...);
}

TEST(HypotBounds, InfFinite) {
  float inf = std::numeric_limits<float>::infinity();
  // hypot(±inf, finite) = +inf
  EXPECT_EQ(hypotBounds(inf, 3.0f), inf);
  EXPECT_EQ(hypotBounds(-inf, 3.0f), inf);
  EXPECT_EQ(hypotBounds(3.0f, inf), inf);
  EXPECT_EQ(hypotBounds(3.0f, -inf), inf);
  EXPECT_EQ(hypotBounds(inf, 0.0f), inf);
  EXPECT_EQ(hypotBounds(0.0f, -inf), inf);
}

TEST(HypotBounds, InfNaN) {
  float inf = std::numeric_limits<float>::infinity();
  float nan = std::numeric_limits<float>::quiet_NaN();
  // IEEE 754: hypot(±inf, NaN) = +inf (inf wins over NaN)
  float r1 = hypotBounds(inf, nan);
  float r2 = hypotBounds(-inf, nan);
  float r3 = hypotBounds(nan, inf);
  float r4 = hypotBounds(nan, -inf);
  EXPECT_TRUE(std::isinf(r1) && r1 > 0) << "hypot(inf, nan) = " << r1;
  EXPECT_TRUE(std::isinf(r2) && r2 > 0) << "hypot(-inf, nan) = " << r2;
  EXPECT_TRUE(std::isinf(r3) && r3 > 0) << "hypot(nan, inf) = " << r3;
  EXPECT_TRUE(std::isinf(r4) && r4 > 0) << "hypot(nan, -inf) = " << r4;
}

TEST(HypotBounds, NaNFinite) {
  float nan = std::numeric_limits<float>::quiet_NaN();
  // hypot(NaN, finite) = NaN (when neither is inf)
  EXPECT_TRUE(std::isnan(hypotBounds(nan, 3.0f)));
  EXPECT_TRUE(std::isnan(hypotBounds(3.0f, nan)));
  EXPECT_TRUE(std::isnan(hypotBounds(nan, nan)));
}

// Wrappers for traits variants — macro instantiates func<Flt>.
template <typename Flt>
Flt hypot_max(Flt x, Flt y) {
  return dfm::hypot<Flt, dfm::MaxAccuracyTraits>(x, y);
}

// Default hypot — positive domain, no NaN/Inf handling needed.
constexpr uint32_t kHypotUlps = 1;
// clang-format off
static const float kHypotDefaultX[] = {
    3.0f, 0.0f, 0.0f, -3.0f, 0.0f, 1.0f,
    1e-20f, 1e10f, 1e-30f, 100.0f, 0.001f};
static const float kHypotDefaultY[] = {
    4.0f, 4.0f, 0.0f, 0.0f, -4.0f, 1.0f,
    1e-20f, 1e10f, 1e-30f, 200.0f, 0.002f};
// clang-format on
FAST_MATH_SPECIAL_TESTS_2ARG(
    HypotDefaultSpecial,
    hypotRef,
    dfm::hypot,
    kHypotDefaultX,
    kHypotDefaultY,
    kHypotUlps)

// MaxAccuracy handles NaN/Inf (IEEE: hypot(±inf, NaN) = +inf).
// clang-format off
static const float kHypotMaxAccX[] = {
    3.0f, 0.0f, 0.0f, -3.0f, 1.0f,
    1e-20f, 1e10f, 100.0f,
    std::numeric_limits<float>::infinity(),                              // inf wins over anything
    -std::numeric_limits<float>::infinity(),
    3.0f,                                                               // finite + inf
    std::numeric_limits<float>::infinity(),                              // inf + NaN → inf
    -std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::quiet_NaN(),                            // NaN + finite → NaN
    3.0f,
    std::numeric_limits<float>::quiet_NaN()};
static const float kHypotMaxAccY[] = {
    4.0f, 4.0f, 0.0f, 0.0f, 1.0f,
    1e-20f, 1e10f, 200.0f,
    3.0f,
    3.0f,
    std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::quiet_NaN(),
    3.0f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::quiet_NaN()};
// clang-format on
FAST_MATH_SPECIAL_TESTS_2ARG(
    HypotMaxAccSpecial,
    ::hypotf,
    hypot_max,
    kHypotMaxAccX,
    kHypotMaxAccY,
    kHypotUlps)


================================================
FILE: tests/fast_math/ldexp_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

TEST(Ldexp, SpecialVals) {
  auto res = dispenso::fast_math::ldexp(-std::numeric_limits<float>::infinity(), 1);
  EXPECT_EQ(res, -std::numeric_limits<float>::infinity());
  res = dispenso::fast_math::ldexp(std::numeric_limits<float>::infinity(), 2);
  EXPECT_EQ(res, std::numeric_limits<float>::infinity());

  res = dispenso::fast_math::ldexp(std::numeric_limits<float>::quiet_NaN(), 7);
  EXPECT_NE(res, res);

  int exp;
  float tval = -0.49999997f;

  float a = dispenso::fast_math::frexp(tval, &exp);
  res = dispenso::fast_math::ldexp(a, exp);
  EXPECT_EQ(res, tval);
}

TEST(Ldexp, Range) {
  auto test = [](float f) {
    int exp;
    float a = dispenso::fast_math::frexp(f, &exp);
    float res = dispenso::fast_math::ldexp(a, exp);
    EXPECT_EQ(res, f);
  };

  dispenso::fast_math::evalForEach(
      -std::numeric_limits<float>::max(), std::numeric_limits<float>::max(), test);
}

// Unified SIMD ldexp test — round-trip frexp → ldexp, verified per lane.
static const float kLdexpInputs[] = {
    1.0f,
    -1.0f,
    0.5f,
    -0.5f,
    2.0f,
    -2.0f,
    100.0f,
    -100.0f,
    0.001f,
    1e-20f,
    1e20f,
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::max(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::quiet_NaN()};

template <typename Flt>
void checkLdexpSimd() {
  using Traits = SimdTestTraits<Flt>;
  using IntT = dfm::IntType_t<Flt>;
  const int32_t N = Traits::laneCount();
  constexpr int32_t numInputs =
      static_cast<int32_t>(sizeof(kLdexpInputs) / sizeof(kLdexpInputs[0]));

  alignas(64) float buf[kMaxSimdLanes];
  alignas(64) float outResult[kMaxSimdLanes];

  for (int32_t base = 0; base < numInputs; base += N) {
    int32_t count = std::min(N, numInputs - base);
    for (int32_t i = 0; i < count; ++i) {
      buf[i] = kLdexpInputs[base + i];
    }
    for (int32_t i = count; i < N; ++i) {
      buf[i] = buf[count - 1];
    }

    // Round-trip: frexp → ldexp should reproduce the input.
    IntT exponent;
    Flt mantissa = dfm::frexp(Traits::load(buf), &exponent);
    Flt result = dfm::ldexp(mantissa, exponent);
    Traits::store(result, outResult);

    for (int32_t i = 0; i < count; ++i) {
      if (std::isnan(buf[i])) {
        EXPECT_TRUE(std::isnan(outResult[i])) << "ldexp(frexp(NaN)) should be NaN";
      } else if (std::isinf(buf[i])) {
        EXPECT_EQ(outResult[i], buf[i]) << "ldexp(frexp(inf)) should be inf";
      } else {
        EXPECT_EQ(outResult[i], buf[i])
            << "ldexp(frexp(" << buf[i] << ")) round-trip mismatch: got " << outResult[i];
      }
    }
  }
}

#if defined(__SSE4_1__)
TEST(LdexpSse, RoundTrip) {
  checkLdexpSimd<__m128>();
}
#endif
#if defined(__AVX2__)
TEST(LdexpAvx, RoundTrip) {
  checkLdexpSimd<__m256>();
}
#endif
#if defined(__AVX512F__)
TEST(LdexpAvx512, RoundTrip) {
  checkLdexpSimd<__m512>();
}
#endif
#if defined(__aarch64__)
TEST(LdexpNeon, RoundTrip) {
  checkLdexpSimd<float32x4_t>();
}
#endif
#if __has_include("hwy/highway.h")
TEST(LdexpHwy, RoundTrip) {
  checkLdexpSimd<dfm::HwyFloat>();
}
#endif


================================================
FILE: tests/fast_math/log10_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

auto log10_accurate = dispenso::fast_math::log10<float, dispenso::fast_math::MaxAccuracyTraits>;

TEST(Log10, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(log10_accurate(0.0f), -kInf);
  EXPECT_NE(log10_accurate(-1.0f), log10_accurate(-1.0f));
  EXPECT_EQ(log10_accurate(kInf), kInf);
  EXPECT_NE(log10_accurate(kNaN), log10_accurate(kNaN));
}

TEST(Log10Accurate, RangeNeg) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::log10f, log10_accurate, 0.0f, 1.0f);
  EXPECT_LE(res, 3u);
}

TEST(Log10Accurate, RangePos) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::log10f, log10_accurate, 1.0f);
  EXPECT_LE(res, 3u);
}

TEST(Log10, RangeNeg) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(::log10f, dispenso::fast_math::log10<float>, 3e-38f, 1.0f);
  EXPECT_LE(res, 3u);
}

TEST(Log10, RangePos) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(::log10f, dispenso::fast_math::log10<float>, 1.0f);
  EXPECT_LE(res, 3u);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt log10_max(Flt x) {
  return dfm::log10<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kLog10MaxUlps = 3;
FAST_MATH_ACCURACY_TESTS(
    Log10MaxAccAll,
    ::log10f,
    log10_max,
    3e-38f,
    std::numeric_limits<float>::max(),
    kLog10MaxUlps)
FAST_MATH_ACCURACY_TESTS(
    Log10DefaultAll,
    ::log10f,
    dfm::log10,
    3e-38f,
    std::numeric_limits<float>::max(),
    kLog10MaxUlps)

// Special values tested across all SIMD backends.
// MaxAccuracy handles out-of-domain inputs (0, negatives, NaN, Inf).
static const float kLog10MaxAccSpecials[] = {
    1.0f,
    0.5f,
    10.0f,
    100.0f,
    1000.0f,
    0.1f,
    0.01f,
    0.0f,
    -0.0f,
    -1.0f,
    -100.0f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::max(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(
    Log10MaxAccSpecial,
    ::log10f,
    log10_max,
    kLog10MaxAccSpecials,
    kLog10MaxUlps)

// Default traits only works for positive normal floats.
static const float kLog10DefaultSpecials[] =
    {1.0f, 0.5f, 10.0f, 100.0f, 1000.0f, 0.1f, 0.01f, 1e-10f, 10000.0f};
FAST_MATH_SPECIAL_TESTS(
    Log10DefaultSpecial,
    ::log10f,
    dfm::log10,
    kLog10DefaultSpecials,
    kLog10MaxUlps)


================================================
FILE: tests/fast_math/log1p_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

static float gt_log1p(float x) {
  return static_cast<float>(std::log1p(static_cast<double>(x)));
}

TEST(Log1p, SpecialValues) {
  EXPECT_EQ(dfm::log1p(0.0f), 0.0f);
  EXPECT_FLOAT_EQ(dfm::log1p(1.0f), gt_log1p(1.0f));
  EXPECT_FLOAT_EQ(dfm::log1p(-0.5f), gt_log1p(-0.5f));
}

TEST(Log1p, NearZero) {
  // The primary use case: x very close to 0 where log(1+x) cancels.
  float xs[] = {1e-7f, -1e-7f, 1e-5f, -1e-5f, 1e-3f, -1e-3f, 0.01f, -0.01f, 0.1f, -0.1f};
  for (float x : xs) {
    float expected = gt_log1p(x);
    float result = dfm::log1p(x);
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, 2u) << "log1p(" << x << "): expected=" << expected << " got=" << result;
  }
}

TEST(Log1p, RangeSmall) {
  uint32_t ulps = dfm::evalAccuracy(gt_log1p, dfm::log1p<float>, -0.5f, 0.5f);
  EXPECT_LE(ulps, 2u);
}

TEST(Log1p, RangeMedium) {
  uint32_t ulps = dfm::evalAccuracy(gt_log1p, dfm::log1p<float>, -0.99f, 100.0f);
  EXPECT_LE(ulps, 2u);
}

TEST(Log1p, RangeLarge) {
  uint32_t ulps = dfm::evalAccuracy(gt_log1p, dfm::log1p<float>, -0.99f, 1e10f);
  EXPECT_LE(ulps, 2u);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kLog1pMaxUlps = 2;
FAST_MATH_ACCURACY_TESTS(Log1pAll, gt_log1p, dfm::log1p, -0.99f, 1e10f, kLog1pMaxUlps)

// Special values tested across all SIMD backends.
// log1p doesn't handle Inf — only test in-domain values.
static const float kLog1pSpecials[] = {
    0.0f,
    -0.0f,
    1.0f,
    -0.5f,
    0.5f,
    10.0f,
    100.0f,
    0.001f,
    -0.001f,
    1e-4f,
    -1e-4f,
    1e-7f,
    -1e-7f,
    0.1f,
    -0.1f,
    1e-6f,
    1e-3f};
FAST_MATH_SPECIAL_TESTS(Log1pSpecial, gt_log1p, dfm::log1p, kLog1pSpecials, kLog1pMaxUlps)


================================================
FILE: tests/fast_math/log2_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

auto log2_w_bounds = dispenso::fast_math::log2<float, dispenso::fast_math::MaxAccuracyTraits>;

#ifdef _WIN32
float groundTruth(float x) {
  return static_cast<float>(::log2l(x));
}
#else
auto groundTruth = ::log2f;
#endif // _WIN32

TEST(Log2, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(log2_w_bounds(0.0f), -kInf);
  EXPECT_NE(log2_w_bounds(-1.0f), log2_w_bounds(-1.0f));
  EXPECT_EQ(log2_w_bounds(kInf), kInf);
  EXPECT_NE(log2_w_bounds(kNaN), log2_w_bounds(kNaN));

  EXPECT_EQ(log2_w_bounds(1e-45f), groundTruth(1e-45f));
}

constexpr int kNegUlps = 1;
constexpr int kPosUlps = 1;

TEST(Log2WBounds, RangeNeg) {
  uint32_t res = dispenso::fast_math::evalAccuracy(groundTruth, log2_w_bounds, 0.0f, 1.0f);
  EXPECT_LE(res, static_cast<uint32_t>(kNegUlps));
}

TEST(Log2WBounds, RangePos) {
  uint32_t res = dispenso::fast_math::evalAccuracy(groundTruth, log2_w_bounds, 1.0f);
  EXPECT_LE(res, static_cast<uint32_t>(kPosUlps));
}

TEST(Log2, RangeNeg) {
  uint32_t res = dispenso::fast_math::evalAccuracy(
      groundTruth, dispenso::fast_math::log2<float>, 3e-38f, 1.0f);
  EXPECT_LE(res, static_cast<uint32_t>(kNegUlps));
}

TEST(Log2, RangePos) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(groundTruth, dispenso::fast_math::log2<float>, 1.0f);
  EXPECT_LE(res, static_cast<uint32_t>(kPosUlps));
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt log2_max(Flt x) {
  return dfm::log2<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kLog2MaxUlps = 1;
FAST_MATH_ACCURACY_TESTS(
    Log2MaxAccAll,
    groundTruth,
    log2_max,
    3e-38f,
    std::numeric_limits<float>::max(),
    kLog2MaxUlps)
FAST_MATH_ACCURACY_TESTS(
    Log2DefaultAll,
    groundTruth,
    dfm::log2,
    3e-38f,
    std::numeric_limits<float>::max(),
    kLog2MaxUlps)

// Special values tested across all SIMD backends.
// MaxAccuracy handles out-of-domain inputs (0, negatives, NaN, Inf).
static const float kLog2MaxAccSpecials[] = {
    1.0f,
    0.5f,
    2.0f,
    4.0f,
    8.0f,
    16.0f,
    1024.0f,
    0.25f,
    0.125f,
    0.0f,
    -0.0f,
    -1.0f,
    -100.0f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::max(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(Log2MaxAccSpecial, groundTruth, log2_max, kLog2MaxAccSpecials, kLog2MaxUlps)

// Default traits only works for positive normal floats.
static const float kLog2DefaultSpecials[] = {1.0f, 0.5f, 2.0f, 4.0f, 8.0f, 100.0f, 0.25f, 1e-10f};
FAST_MATH_SPECIAL_TESTS(
    Log2DefaultSpecial,
    groundTruth,
    dfm::log2,
    kLog2DefaultSpecials,
    kLog2MaxUlps)


================================================
FILE: tests/fast_math/log_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

auto log_accurate = dispenso::fast_math::log<float, dispenso::fast_math::MaxAccuracyTraits>;

TEST(Log, SpecialValues) {
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_EQ(log_accurate(0.0f), -kInf);
  EXPECT_NE(log_accurate(-1.0f), log_accurate(-1.0f));
  EXPECT_EQ(log_accurate(kInf), kInf);
  EXPECT_NE(log_accurate(kNaN), log_accurate(kNaN));
}

TEST(LogAccurate, RangeNeg) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::logf, log_accurate, 0.0f, 1.0f);
  EXPECT_LE(res, 2u);
}

TEST(LogAccurate, RangePos) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::logf, log_accurate, 1.0f);
  EXPECT_LE(res, 2u);
}

TEST(Log, RangeNeg) {
  uint32_t res =
      dispenso::fast_math::evalAccuracy(::logf, dispenso::fast_math::log<float>, 3e-38f, 1.0f);
  EXPECT_LE(res, 2u);
}

TEST(Log, RangePos) {
  uint32_t res = dispenso::fast_math::evalAccuracy(::logf, dispenso::fast_math::log<float>, 1.0f);
  EXPECT_LE(res, 2u);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt log_max(Flt x) {
  return dfm::log<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kLogMaxUlps = 2;
FAST_MATH_ACCURACY_TESTS(
    LogMaxAccAll,
    ::logf,
    log_max,
    3e-38f,
    std::numeric_limits<float>::max(),
    kLogMaxUlps)
FAST_MATH_ACCURACY_TESTS(
    LogDefaultAll,
    ::logf,
    dfm::log,
    3e-38f,
    std::numeric_limits<float>::max(),
    kLogMaxUlps)

// Special values tested across all SIMD backends.
// MaxAccuracy handles out-of-domain inputs (0, negatives, NaN, Inf).
static const float kLogMaxAccSpecials[] = {
    1.0f,
    0.5f,
    2.0f,
    10.0f,
    100.0f,
    0.01f,
    1e-6f,
    1e10f,
    0.0f,
    -0.0f,
    -1.0f,
    -100.0f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::max(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(LogMaxAccSpecial, ::logf, log_max, kLogMaxAccSpecials, kLogMaxUlps)

// Default traits only works for positive normal floats.
static const float kLogDefaultSpecials[] =
    {1.0f, 0.5f, 2.0f, 10.0f, 100.0f, 0.01f, 1000.0f, 1e-10f, 1e6f};
FAST_MATH_SPECIAL_TESTS(LogDefaultSpecial, ::logf, dfm::log, kLogDefaultSpecials, kLogMaxUlps)


================================================
FILE: tests/fast_math/neon_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#if defined(__aarch64__)

#include <arm_neon.h>

#include <cmath>
#include <limits>

#include <dispenso/fast_math/fast_math.h>
#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using NeonFloat = dfm::NeonFloat;
using NeonInt32 = dfm::NeonInt32;
using NeonUint32 = dfm::NeonUint32;

constexpr int kLanes = 4;

// --- Lane extractors ---

static float lane(float32x4_t v, int i) {
  alignas(16) float buf[kLanes];
  vst1q_f32(buf, v);
  return buf[i];
}
static float lane(NeonFloat v, int i) {
  return lane(v.v, i);
}

static int32_t lane(int32x4_t v, int i) {
  alignas(16) int32_t buf[kLanes];
  vst1q_s32(buf, v);
  return buf[i];
}
static int32_t lane(NeonInt32 v, int i) {
  return lane(v.v, i);
}

static uint32_t lane(uint32x4_t v, int i) {
  alignas(16) uint32_t buf[kLanes];
  vst1q_u32(buf, v);
  return buf[i];
}
static uint32_t lane(NeonUint32 v, int i) {
  return lane(v.v, i);
}

// --- Helpers ---

static float32x4_t make4(float a, float b, float c, float d) {
  float buf[4] = {a, b, c, d};
  return vld1q_f32(buf);
}

static int32x4_t makeInt4(int32_t a, int32_t b, int32_t c, int32_t d) {
  int32_t buf[4] = {a, b, c, d};
  return vld1q_s32(buf);
}

// ==================== Basic Arithmetic ====================

TEST(NeonFloat, Arithmetic) {
  NeonFloat a(3.0f), b(2.0f);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a + b, i), 5.0f);
    EXPECT_EQ(lane(a - b, i), 1.0f);
    EXPECT_EQ(lane(a * b, i), 6.0f);
    EXPECT_EQ(lane(a / b, i), 1.5f);
  }
}

TEST(NeonFloat, Negation) {
  NeonFloat a = make4(1.0f, -2.0f, 0.0f, 3.5f);
  NeonFloat neg = -a;
  EXPECT_EQ(lane(neg, 0), -1.0f);
  EXPECT_EQ(lane(neg, 1), 2.0f);
  EXPECT_FLOAT_EQ(lane(neg, 2), -0.0f);
  EXPECT_EQ(lane(neg, 3), -3.5f);
}

TEST(NeonFloat, CompoundAssignment) {
  NeonFloat a(10.0f), b(3.0f);
  a += b;
  for (int i = 0; i < kLanes; ++i)
    EXPECT_EQ(lane(a, i), 13.0f);
  a -= b;
  for (int i = 0; i < kLanes; ++i)
    EXPECT_EQ(lane(a, i), 10.0f);
  a *= b;
  for (int i = 0; i < kLanes; ++i)
    EXPECT_EQ(lane(a, i), 30.0f);
}

TEST(NeonInt32, Arithmetic) {
  NeonInt32 a(7), b(3);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a + b, i), 10);
    EXPECT_EQ(lane(a - b, i), 4);
    EXPECT_EQ(lane(a * b, i), 21);
  }
}

TEST(NeonInt32, Negation) {
  NeonInt32 a = makeInt4(5, -3, 0, 100);
  NeonInt32 neg = -a;
  EXPECT_EQ(lane(neg, 0), -5);
  EXPECT_EQ(lane(neg, 1), 3);
  EXPECT_EQ(lane(neg, 2), 0);
  EXPECT_EQ(lane(neg, 3), -100);
}

TEST(NeonInt32, Shifts) {
  NeonInt32 a(16);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a << 2, i), 64);
    EXPECT_EQ(lane(a >> 2, i), 4);
  }
  // Arithmetic right shift preserves sign.
  NeonInt32 neg(-16);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(neg >> 2, i), -4);
  }
}

TEST(NeonUint32, Shifts) {
  NeonUint32 a(16u);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(a << 2, i), 64u);
    EXPECT_EQ(lane(a >> 2, i), 4u);
  }
  // Logical right shift (no sign extension).
  NeonUint32 big(0x80000000u);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(big >> 1, i), 0x40000000u);
  }
}

// ==================== Comparisons ====================

TEST(NeonFloat, Comparisons) {
  NeonFloat a = make4(1.0f, 2.0f, 3.0f, 4.0f);
  NeonFloat b = make4(4.0f, 3.0f, 3.0f, 1.0f);

  NeonFloat lt = a < b;
  EXPECT_NE(lane(NeonInt32(bit_cast<NeonInt32>(lt)), 0), 0); // 1 < 4
  EXPECT_NE(lane(NeonInt32(bit_cast<NeonInt32>(lt)), 1), 0); // 2 < 3
  EXPECT_EQ(lane(NeonInt32(bit_cast<NeonInt32>(lt)), 2), 0); // 3 == 3
  EXPECT_EQ(lane(NeonInt32(bit_cast<NeonInt32>(lt)), 3), 0); // 4 > 1
}

TEST(NeonInt32, Comparisons) {
  NeonInt32 a = makeInt4(1, 5, -3, 0);
  NeonInt32 b = makeInt4(2, 5, 0, -1);

  NeonInt32 lt = a < b;
  EXPECT_NE(lane(lt, 0), 0); // 1 < 2
  EXPECT_EQ(lane(lt, 1), 0); // 5 == 5
  EXPECT_NE(lane(lt, 2), 0); // -3 < 0
  EXPECT_EQ(lane(lt, 3), 0); // 0 > -1

  NeonInt32 eq = a == b;
  EXPECT_EQ(lane(eq, 0), 0);
  EXPECT_NE(lane(eq, 1), 0);
  EXPECT_EQ(lane(eq, 2), 0);
  EXPECT_EQ(lane(eq, 3), 0);
}

TEST(NeonUint32, UnsignedComparisons) {
  NeonUint32 a(0x80000001u); // Large unsigned
  NeonUint32 b(1u);
  // a > b in unsigned (but would be negative in signed).
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_NE(lane(NeonUint32(a > b), i), 0u);
    EXPECT_EQ(lane(NeonUint32(a < b), i), 0u);
  }
}

// ==================== bit_cast ====================

TEST(NeonBitCast, FloatIntRoundTrip) {
  NeonFloat f = make4(1.0f, -2.0f, 0.0f, 3.14f);
  NeonInt32 fi = dfm::bit_cast<NeonInt32>(f);
  NeonFloat back = dfm::bit_cast<NeonFloat>(fi);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(back, i), lane(f, i));
  }
}

TEST(NeonBitCast, FloatUintRoundTrip) {
  NeonFloat f = make4(1.0f, -2.0f, 0.0f, 3.14f);
  NeonUint32 fu = dfm::bit_cast<NeonUint32>(f);
  NeonFloat back = dfm::bit_cast<NeonFloat>(fu);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(back, i), lane(f, i));
  }
}

TEST(NeonBitCast, IntUintRoundTrip) {
  NeonInt32 a = makeInt4(1, -1, 0, 0x7FFFFFFF);
  NeonUint32 au = dfm::bit_cast<NeonUint32>(a);
  NeonInt32 back = dfm::bit_cast<NeonInt32>(au);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(back, i), lane(a, i));
  }
}

// ==================== FloatTraits ====================

TEST(NeonFloatTraits, ConditionalWithMask) {
  // Even lanes true, odd lanes false via float mask.
  uint32_t mask_bits[4] = {0xFFFFFFFF, 0, 0xFFFFFFFF, 0};
  NeonFloat mask = vreinterpretq_f32_u32(vld1q_u32(mask_bits));
  NeonFloat x(10.0f);
  NeonFloat y(20.0f);
  NeonFloat result = dfm::FloatTraits<NeonFloat>::conditional(mask, x, y);
  EXPECT_EQ(lane(result, 0), 10.0f);
  EXPECT_EQ(lane(result, 1), 20.0f);
  EXPECT_EQ(lane(result, 2), 10.0f);
  EXPECT_EQ(lane(result, 3), 20.0f);
}

TEST(NeonFloatTraits, ConditionalFromComparison) {
  NeonFloat a = make4(1.0f, 5.0f, 10.0f, 15.0f);
  NeonFloat threshold(8.0f);
  auto mask = a > threshold;
  NeonFloat result = dfm::FloatTraits<NeonFloat>::conditional(mask, NeonFloat(100.0f), a);
  EXPECT_EQ(lane(result, 0), 1.0f);
  EXPECT_EQ(lane(result, 1), 5.0f);
  EXPECT_EQ(lane(result, 2), 100.0f);
  EXPECT_EQ(lane(result, 3), 100.0f);
}

TEST(NeonFloatTraits, ConditionalInt32WithMask) {
  uint32_t mask_bits[4] = {0xFFFFFFFF, 0, 0xFFFFFFFF, 0};
  NeonFloat mask = vreinterpretq_f32_u32(vld1q_u32(mask_bits));
  NeonInt32 x(100);
  NeonInt32 y(200);
  NeonInt32 result = dfm::FloatTraits<NeonFloat>::conditional(mask, x, y);
  EXPECT_EQ(lane(result, 0), 100);
  EXPECT_EQ(lane(result, 1), 200);
  EXPECT_EQ(lane(result, 2), 100);
  EXPECT_EQ(lane(result, 3), 200);
}

TEST(NeonFloatTraits, ConditionalWithLaneWideMask) {
  // Test the NeonInt32 mask overload.
  NeonInt32 mask = makeInt4(-1, 0, -1, 0);
  NeonFloat x(10.0f);
  NeonFloat y(20.0f);
  NeonFloat result = dfm::FloatTraits<NeonFloat>::conditional(mask, x, y);
  EXPECT_EQ(lane(result, 0), 10.0f);
  EXPECT_EQ(lane(result, 1), 20.0f);
  EXPECT_EQ(lane(result, 2), 10.0f);
  EXPECT_EQ(lane(result, 3), 20.0f);
}

TEST(NeonFloatTraits, Apply) {
  uint32_t mask_bits[4] = {0, 0, 0xFFFFFFFF, 0xFFFFFFFF};
  NeonFloat mask = vreinterpretq_f32_u32(vld1q_u32(mask_bits));
  NeonFloat x(42.0f);
  NeonFloat result = dfm::FloatTraits<NeonFloat>::apply(mask, x);
  EXPECT_EQ(lane(result, 0), 0.0f);
  EXPECT_EQ(lane(result, 1), 0.0f);
  EXPECT_EQ(lane(result, 2), 42.0f);
  EXPECT_EQ(lane(result, 3), 42.0f);
}

TEST(NeonFloatTraits, Fma) {
  NeonFloat a(2.0f), b(3.0f), c(4.0f);
  NeonFloat result = dfm::FloatTraits<NeonFloat>::fma(a, b, c);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 10.0f) << "Lane " << i;
  }
}

TEST(NeonFloatTraits, Sqrt) {
  NeonFloat a = make4(4.0f, 9.0f, 16.0f, 25.0f);
  NeonFloat result = dfm::FloatTraits<NeonFloat>::sqrt(a);
  EXPECT_EQ(lane(result, 0), 2.0f);
  EXPECT_EQ(lane(result, 1), 3.0f);
  EXPECT_EQ(lane(result, 2), 4.0f);
  EXPECT_EQ(lane(result, 3), 5.0f);
}

TEST(NeonFloatTraits, MinMax) {
  NeonFloat a = make4(1.0f, 5.0f, 3.0f, 7.0f);
  NeonFloat b = make4(4.0f, 2.0f, 3.0f, 8.0f);
  NeonFloat mn = dfm::FloatTraits<NeonFloat>::min(a, b);
  NeonFloat mx = dfm::FloatTraits<NeonFloat>::max(a, b);
  EXPECT_EQ(lane(mn, 0), 1.0f);
  EXPECT_EQ(lane(mn, 1), 2.0f);
  EXPECT_EQ(lane(mn, 2), 3.0f);
  EXPECT_EQ(lane(mn, 3), 7.0f);
  EXPECT_EQ(lane(mx, 0), 4.0f);
  EXPECT_EQ(lane(mx, 1), 5.0f);
  EXPECT_EQ(lane(mx, 2), 3.0f);
  EXPECT_EQ(lane(mx, 3), 8.0f);
}

// ==================== Util Functions ====================

TEST(NeonUtil, FloorSmall) {
  float32x4_t a = make4(1.5f, -1.5f, 2.0f, -0.1f);
  auto result = dfm::floor_small(a);
  EXPECT_EQ(lane(result, 0), 1.0f);
  EXPECT_EQ(lane(result, 1), -2.0f);
  EXPECT_EQ(lane(result, 2), 2.0f);
  EXPECT_EQ(lane(result, 3), -1.0f);
}

TEST(NeonUtil, ConvertToInt) {
  float32x4_t a = make4(1.5f, -1.5f, 2.4f, 3.6f);
  auto result = dfm::convert_to_int(a);
  // Round-to-nearest-even: 1.5→2, -1.5→-2, 2.4→2, 3.6→4
  EXPECT_EQ(lane(result, 0), 2);
  EXPECT_EQ(lane(result, 1), -2);
  EXPECT_EQ(lane(result, 2), 2);
  EXPECT_EQ(lane(result, 3), 4);
}

TEST(NeonUtil, ConvertToIntNaN) {
  float nan = std::numeric_limits<float>::quiet_NaN();
  float inf = std::numeric_limits<float>::infinity();
  float32x4_t a = make4(nan, inf, -inf, 5.0f);
  auto result = dfm::convert_to_int(a);
  EXPECT_EQ(lane(result, 0), 0); // NaN → 0
  EXPECT_EQ(lane(result, 1), 0); // Inf → 0
  EXPECT_EQ(lane(result, 2), 0); // -Inf → 0
  EXPECT_EQ(lane(result, 3), 5); // Normal
}

TEST(NeonUtil, Gather) {
  float table[] = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f};
  int32x4_t index = makeInt4(3, 0, 7, 1);
  auto result = dfm::gather<NeonFloat>(table, index);
  EXPECT_EQ(lane(result, 0), 40.0f);
  EXPECT_EQ(lane(result, 1), 10.0f);
  EXPECT_EQ(lane(result, 2), 80.0f);
  EXPECT_EQ(lane(result, 3), 20.0f);
}

TEST(NeonUtil, IntDivBy3) {
  int32x4_t a = makeInt4(0, 3, 9, 99);
  auto result = dfm::int_div_by_3(a);
  int32_t inputs[] = {0, 3, 9, 99};
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), inputs[i] / 3) << "Lane " << i << " input=" << inputs[i];
  }
}

TEST(NeonUtil, Signof) {
  float32x4_t a = make4(1.0f, -1.0f, 0.0f, -0.0f);
  auto result = dfm::signof(a);
  EXPECT_EQ(lane(result, 0), 1.0f);
  EXPECT_EQ(lane(result, 1), -1.0f);
  EXPECT_EQ(lane(result, 2), 1.0f);
  EXPECT_EQ(lane(result, 3), -1.0f);
}

TEST(NeonUtil, Signofi) {
  int32x4_t a = makeInt4(5, -3, 0, -100);
  auto result = dfm::signofi<NeonFloat>(a);
  EXPECT_EQ(lane(result, 0), 1);
  EXPECT_EQ(lane(result, 1), -1);
  EXPECT_EQ(lane(result, 2), 1);
  EXPECT_EQ(lane(result, 3), -1);
}

TEST(NeonUtil, Nonnormal) {
  float inf = std::numeric_limits<float>::infinity();
  float nan = std::numeric_limits<float>::quiet_NaN();
  float32x4_t a = make4(1.0f, inf, nan, 0.0f);
  auto result = dfm::nonnormal(NeonFloat(a));
  EXPECT_EQ(lane(result, 0), 0); // Normal
  EXPECT_NE(lane(result, 1), 0); // Inf
  EXPECT_NE(lane(result, 2), 0); // NaN
  EXPECT_EQ(lane(result, 3), 0); // Zero
}

TEST(NeonUtil, NonnormalOrZero) {
  float inf = std::numeric_limits<float>::infinity();
  float nan = std::numeric_limits<float>::quiet_NaN();
  float32x4_t a = make4(1.0f, inf, nan, 0.0f);
  NeonInt32 fi = dfm::bit_cast<NeonInt32>(NeonFloat(a));
  auto result = dfm::nonnormalOrZero<NeonFloat>(fi);
  EXPECT_EQ(lane(result, 0), 0); // Normal
  EXPECT_NE(lane(result, 1), 0); // Inf
  EXPECT_NE(lane(result, 2), 0); // NaN
  EXPECT_NE(lane(result, 3), 0); // Zero
}

TEST(NeonUtil, BoolAsOne) {
  NeonFloat mask = NeonFloat(1.0f) > NeonFloat(0.0f); // All true
  NeonFloat result = dfm::bool_as_one<NeonFloat, NeonFloat>(mask);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 1.0f);
  }

  NeonFloat false_mask = NeonFloat(0.0f) > NeonFloat(1.0f); // All false
  NeonFloat result2 = dfm::bool_as_one<NeonFloat, NeonFloat>(false_mask);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result2, i), 0.0f);
  }
}

TEST(NeonUtil, NboolAsOne) {
  NeonFloat mask = NeonFloat(1.0f) > NeonFloat(0.0f); // All true
  NeonFloat result = dfm::nbool_as_one<NeonFloat, NeonFloat>(mask);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), 0.0f); // nbool: true → 0
  }

  NeonFloat false_mask = NeonFloat(0.0f) > NeonFloat(1.0f); // All false
  NeonFloat result2 = dfm::nbool_as_one<NeonFloat, NeonFloat>(false_mask);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result2, i), 1.0f); // nbool: false → 1
  }
}

TEST(NeonUtil, BoolAsMask) {
  NeonFloat mask = NeonFloat(1.0f) > NeonFloat(0.0f); // All true = 0xFFFFFFFF
  NeonInt32 result = dfm::bool_as_mask<NeonInt32, NeonFloat>(mask);
  for (int i = 0; i < kLanes; ++i) {
    EXPECT_EQ(lane(result, i), -1);
  }
}

TEST(NeonUtil, BoolApplyOrZero) {
  NeonFloat mask = NeonFloat(make4(1.0f, 0.0f, 3.0f, 0.0f)) > NeonFloat(0.5f);
  NeonFloat val(42.0f);
  NeonFloat result = dfm::bool_apply_or_zero(mask, val);
  EXPECT_EQ(lane(result, 0), 42.0f);
  EXPECT_EQ(lane(result, 1), 0.0f);
  EXPECT_EQ(lane(result, 2), 42.0f);
  EXPECT_EQ(lane(result, 3), 0.0f);
}

TEST(NeonUtil, ClampAllowNan) {
  float32x4_t x = make4(-5.0f, 0.5f, 10.0f, 0.5f);
  auto result = dfm::clamp_allow_nan(NeonFloat(x), NeonFloat(0.0f), NeonFloat(1.0f));
  EXPECT_EQ(lane(result, 0), 0.0f);
  EXPECT_EQ(lane(result, 1), 0.5f);
  EXPECT_EQ(lane(result, 2), 1.0f);
  EXPECT_EQ(lane(result, 3), 0.5f);

  // NaN propagates.
  float nan = std::numeric_limits<float>::quiet_NaN();
  float32x4_t nan_x = make4(nan, 0.5f, nan, 0.5f);
  auto nan_result = dfm::clamp_allow_nan(NeonFloat(nan_x), NeonFloat(0.0f), NeonFloat(1.0f));
  EXPECT_TRUE(std::isnan(lane(nan_result, 0)));
  EXPECT_EQ(lane(nan_result, 1), 0.5f);
  EXPECT_TRUE(std::isnan(lane(nan_result, 2)));
  EXPECT_EQ(lane(nan_result, 3), 0.5f);
}

TEST(NeonUtil, ClampNoNan) {
  float32x4_t x = make4(-5.0f, 0.5f, 10.0f, 0.5f);
  auto result = dfm::clamp_no_nan(NeonFloat(x), NeonFloat(0.0f), NeonFloat(1.0f));
  EXPECT_EQ(lane(result, 0), 0.0f);
  EXPECT_EQ(lane(result, 1), 0.5f);
  EXPECT_EQ(lane(result, 2), 1.0f);
  EXPECT_EQ(lane(result, 3), 0.5f);

  // NaN is suppressed — result must be in [mn, mx].
  float nan = std::numeric_limits<float>::quiet_NaN();
  float32x4_t nan_x = make4(nan, 0.5f, nan, 0.5f);
  auto nan_result = dfm::clamp_no_nan(NeonFloat(nan_x), NeonFloat(0.0f), NeonFloat(1.0f));
  EXPECT_FALSE(std::isnan(lane(nan_result, 0)));
  EXPECT_GE(lane(nan_result, 0), 0.0f);
  EXPECT_LE(lane(nan_result, 0), 1.0f);
  EXPECT_EQ(lane(nan_result, 1), 0.5f);
  EXPECT_FALSE(std::isnan(lane(nan_result, 2)));
  EXPECT_EQ(lane(nan_result, 3), 0.5f);
}

#endif // defined(__aarch64__)


================================================
FILE: tests/fast_math/pow_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <cmath>
#include <random>

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

// Ground truth: double-precision pow, rounded to float.
static float gt_pow(float x, float y) {
  return static_cast<float>(std::pow(static_cast<double>(x), static_cast<double>(y)));
}

// --- Accuracy tests ---

TEST(Pow, SpecialValues) {
  auto pow_f = dfm::pow<float>;
  EXPECT_EQ(pow_f(2.0f, 10.0f), 1024.0f);
  EXPECT_EQ(pow_f(0.5f, 2.0f), 0.25f);
  EXPECT_EQ(pow_f(4.0f, 0.5f), 2.0f);
  EXPECT_NEAR(pow_f(8.0f, 1.0f / 3.0f), 2.0f, 1e-6f);
  EXPECT_NEAR(pow_f(10.0f, -1.0f), 0.1f, 1e-7f);
}

TEST(Pow, ExactIntegerPowers) {
  // pow(2, n) for small n should be exact or near-exact.
  for (int32_t n = 1; n <= 23; ++n) {
    float expected = static_cast<float>(1 << n);
    float result = dfm::pow(2.0f, static_cast<float>(n));
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, 1u) << "pow(2, " << n << "): expected=" << expected << " got=" << result;
  }
  // pow(3, n) for small n.
  float p3 = 1.0f;
  for (int32_t n = 1; n <= 14; ++n) {
    p3 *= 3.0f;
    float result = dfm::pow(3.0f, static_cast<float>(n));
    uint32_t dist = dfm::float_distance(p3, result);
    EXPECT_LE(dist, 2u) << "pow(3, " << n << "): expected=" << p3 << " got=" << result;
  }
}

TEST(Pow, RandomModerate) {
  std::mt19937 rng(42);
  std::uniform_real_distribution<float> x_dist(0.01f, 100.0f);
  std::uniform_real_distribution<float> y_dist(-8.0f, 8.0f);

  uint32_t maxUlps = 0;
  constexpr int32_t kN = 1 << 20;
  for (int32_t i = 0; i < kN; ++i) {
    float x = x_dist(rng);
    float y = y_dist(rng);
    float expected = gt_pow(x, y);
    float result = dfm::pow(x, y);
    if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
      continue;
    }
    uint32_t dist = dfm::float_distance(expected, result);
    maxUlps = std::max(maxUlps, dist);
  }
  // Default path now uses double-precision core: <1 ULP for float output.
  EXPECT_LE(maxUlps, 4u) << "RandomModerate max ULP";
}

TEST(Pow, RandomWide) {
  std::mt19937 rng(123);
  std::uniform_real_distribution<float> x_dist(1e-10f, 1e10f);
  std::uniform_real_distribution<float> y_dist(-4.0f, 4.0f);

  uint32_t maxUlps = 0;
  constexpr int32_t kN = 1 << 20;
  for (int32_t i = 0; i < kN; ++i) {
    float x = x_dist(rng);
    float y = y_dist(rng);
    float expected = gt_pow(x, y);
    float result = dfm::pow(x, y);
    if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
      continue;
    }
    uint32_t dist = dfm::float_distance(expected, result);
    maxUlps = std::max(maxUlps, dist);
  }
  EXPECT_LE(maxUlps, 4u) << "RandomWide max ULP";
}

TEST(Pow, RandomSmallExp) {
  std::mt19937 rng(7);
  std::uniform_real_distribution<float> x_dist(0.5f, 2.0f);
  std::uniform_real_distribution<float> y_dist(-1.0f, 1.0f);

  uint32_t maxUlps = 0;
  constexpr int32_t kN = 1 << 20;
  for (int32_t i = 0; i < kN; ++i) {
    float x = x_dist(rng);
    float y = y_dist(rng);
    float expected = gt_pow(x, y);
    float result = dfm::pow(x, y);
    if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
      continue;
    }
    uint32_t dist = dfm::float_distance(expected, result);
    maxUlps = std::max(maxUlps, dist);
  }
  EXPECT_LE(maxUlps, 3u) << "RandomSmallExp max ULP";
}

// --- Negative base tests ---

TEST(Pow, NegativeBaseIntegerExp) {
  // pow(neg, even) → positive, pow(neg, odd) → negative.
  float bases[] = {-0.5f, -1.0f, -2.0f, -100.0f};
  float exps[] = {2.0f, 3.0f, 4.0f, 5.0f, -1.0f, -2.0f, -3.0f};

  for (float x : bases) {
    for (float y : exps) {
      float expected = gt_pow(x, y);
      float result = dfm::pow<float, dfm::MaxAccuracyTraits>(x, y);
      if (std::isnan(expected)) {
        EXPECT_TRUE(std::isnan(result)) << "pow(" << x << ", " << y << ")";
      } else if (std::isinf(expected)) {
        EXPECT_TRUE(std::isinf(result)) << "pow(" << x << ", " << y << ")";
        EXPECT_EQ(std::signbit(expected), std::signbit(result))
            << "pow(" << x << ", " << y << ") sign mismatch";
      } else {
        uint32_t dist = dfm::float_distance(expected, result);
        EXPECT_LE(dist, 3u) << "pow(" << x << ", " << y << "): expected=" << expected
                            << " got=" << result;
        // Check sign correctness.
        EXPECT_EQ(std::signbit(expected), std::signbit(result))
            << "pow(" << x << ", " << y << ") sign mismatch";
      }
    }
  }
}

TEST(Pow, NegativeBaseNonIntegerExp) {
  // pow(negative, non-integer) → NaN.
  float bases[] = {-1.0f, -2.0f, -0.5f};
  float exps[] = {0.5f, 1.5f, -0.5f, 2.7f};

  for (float x : bases) {
    for (float y : exps) {
      float result = dfm::pow<float, dfm::MaxAccuracyTraits>(x, y);
      EXPECT_TRUE(std::isnan(result))
          << "pow(" << x << ", " << y << ") should be NaN, got " << result;
    }
  }
}

TEST(Pow, NegativeBaseRandomInteger) {
  std::mt19937 rng(999);
  std::uniform_real_distribution<float> x_dist(-100.0f, -0.01f);

  uint32_t maxUlps = 0;
  for (int32_t y_int = -50; y_int <= 50; ++y_int) {
    float y = static_cast<float>(y_int);
    for (int32_t i = 0; i < 1024; ++i) {
      float x = x_dist(rng);
      float expected = gt_pow(x, y);
      float result = dfm::pow<float, dfm::MaxAccuracyTraits>(x, y);
      if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
        continue;
      }
      uint32_t dist = dfm::float_distance(expected, result);
      maxUlps = std::max(maxUlps, dist);
    }
  }
  // MaxAccuracy with negative base goes through exp2(y * log2(|x|)) + sign XOR.
  // For y up to 50 and large |x|, error can reach ~10 ULP.
  EXPECT_LE(maxUlps, 16u) << "NegativeBaseRandomInteger max ULP";
}

// --- IEEE 754 boundary conditions (MaxAccuracyTraits) ---

TEST(PowBounds, YZero) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  // pow(x, 0) = 1 for all x.
  float xs[] = {0.0f, -0.0f, 1.0f, -1.0f, kInf, -kInf, kNaN, 42.0f, -42.0f};
  for (float x : xs) {
    EXPECT_EQ(pow_b(x, 0.0f), 1.0f) << "pow(" << x << ", 0) should be 1";
  }
}

TEST(PowBounds, XOne) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  // pow(1, y) = 1 for all y, even NaN.
  float ys[] = {0.0f, 1.0f, -1.0f, kInf, -kInf, kNaN, 42.0f};
  for (float y : ys) {
    EXPECT_EQ(pow_b(1.0f, y), 1.0f) << "pow(1, " << y << ") should be 1";
  }
}

TEST(PowBounds, NegOneInf) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();

  EXPECT_EQ(pow_b(-1.0f, kInf), 1.0f) << "pow(-1, +inf) should be 1";
  EXPECT_EQ(pow_b(-1.0f, -kInf), 1.0f) << "pow(-1, -inf) should be 1";
}

TEST(PowBounds, ZeroPosExp) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;

  EXPECT_EQ(pow_b(0.0f, 1.0f), 0.0f);
  EXPECT_FALSE(std::signbit(pow_b(0.0f, 1.0f)));
  EXPECT_EQ(pow_b(0.0f, 2.0f), 0.0f);
  EXPECT_EQ(pow_b(0.0f, 0.5f), 0.0f);

  EXPECT_EQ(pow_b(-0.0f, 1.0f), -0.0f);
  EXPECT_TRUE(std::signbit(pow_b(-0.0f, 1.0f)));
  EXPECT_EQ(pow_b(-0.0f, 3.0f), -0.0f);
  EXPECT_TRUE(std::signbit(pow_b(-0.0f, 3.0f)));
  EXPECT_EQ(pow_b(-0.0f, 2.0f), 0.0f);
  EXPECT_FALSE(std::signbit(pow_b(-0.0f, 2.0f)));
  EXPECT_EQ(pow_b(-0.0f, 0.5f), 0.0f);
}

TEST(PowBounds, ZeroNegExp) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();

  EXPECT_EQ(pow_b(0.0f, -1.0f), kInf);
  EXPECT_EQ(pow_b(0.0f, -2.0f), kInf);
  EXPECT_EQ(pow_b(-0.0f, -1.0f), -kInf);
  EXPECT_EQ(pow_b(-0.0f, -3.0f), -kInf);
  EXPECT_EQ(pow_b(-0.0f, -2.0f), kInf);
}

TEST(PowBounds, InfPosExp) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();

  EXPECT_EQ(pow_b(kInf, 1.0f), kInf);
  EXPECT_EQ(pow_b(kInf, 2.0f), kInf);
  EXPECT_EQ(pow_b(-kInf, 1.0f), -kInf);
  EXPECT_EQ(pow_b(-kInf, 3.0f), -kInf);
  EXPECT_EQ(pow_b(-kInf, 2.0f), kInf);
}

TEST(PowBounds, InfNegExp) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();

  EXPECT_EQ(pow_b(kInf, -1.0f), 0.0f);
  EXPECT_FALSE(std::signbit(pow_b(kInf, -1.0f)));
  EXPECT_EQ(pow_b(kInf, -2.0f), 0.0f);

  EXPECT_EQ(pow_b(-kInf, -1.0f), -0.0f);
  EXPECT_TRUE(std::signbit(pow_b(-kInf, -1.0f)));
  EXPECT_EQ(pow_b(-kInf, -3.0f), -0.0f);
  EXPECT_TRUE(std::signbit(pow_b(-kInf, -3.0f)));
  EXPECT_EQ(pow_b(-kInf, -2.0f), 0.0f);
  EXPECT_FALSE(std::signbit(pow_b(-kInf, -2.0f)));
}

TEST(PowBounds, AbsXInf) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kInf = std::numeric_limits<float>::infinity();

  EXPECT_EQ(pow_b(0.5f, -kInf), kInf);
  EXPECT_EQ(pow_b(2.0f, -kInf), 0.0f);
  EXPECT_EQ(pow_b(0.5f, kInf), 0.0f);
  EXPECT_EQ(pow_b(2.0f, kInf), kInf);
}

TEST(PowBounds, NaNPropagation) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_TRUE(std::isnan(pow_b(kNaN, 2.0f)));
  EXPECT_TRUE(std::isnan(pow_b(2.0f, kNaN)));
  EXPECT_TRUE(std::isnan(pow_b(kNaN, kNaN)));
  EXPECT_TRUE(std::isnan(pow_b(-2.0f, 1.5f)));
}

TEST(PowBounds, Subnormal) {
  auto pow_b = dfm::pow<float, dfm::MaxAccuracyTraits>;
  // Subnormal bases: fp32 subnormals are in [1.4e-45, 1.2e-38].
  float subnormals[] = {1.0e-40f, 1.0e-42f, 1.0e-44f, std::numeric_limits<float>::denorm_min()};
  float exps[] = {2.0f, 0.5f, -1.0f, 3.0f};

  for (float x : subnormals) {
    for (float y : exps) {
      float expected = gt_pow(x, y);
      float result = pow_b(x, y);
      if (std::isnan(expected)) {
        EXPECT_TRUE(std::isnan(result)) << "pow(" << x << ", " << y << ")";
      } else if (std::isinf(expected)) {
        EXPECT_TRUE(std::isinf(result))
            << "pow(" << x << ", " << y << ") expected inf, got " << result;
      } else if (expected == 0.0f) {
        EXPECT_EQ(result, 0.0f) << "pow(" << x << ", " << y << ") expected 0, got " << result;
      } else {
        uint32_t dist = dfm::float_distance(expected, result);
        EXPECT_LE(dist, 4u) << "pow(" << x << ", " << y << "): expected=" << expected
                            << " got=" << result;
      }
    }
  }
}

// --- MaxAccuracy random tests ---

TEST(PowAccurate, RandomModerate) {
  std::mt19937 rng(42);
  std::uniform_real_distribution<float> x_dist(0.01f, 100.0f);
  std::uniform_real_distribution<float> y_dist(-8.0f, 8.0f);

  uint32_t maxUlps = 0;
  constexpr int32_t kN = 1 << 20;
  for (int32_t i = 0; i < kN; ++i) {
    float x = x_dist(rng);
    float y = y_dist(rng);
    float expected = gt_pow(x, y);
    float result = dfm::pow<float, dfm::MaxAccuracyTraits>(x, y);
    if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
      continue;
    }
    uint32_t dist = dfm::float_distance(expected, result);
    maxUlps = std::max(maxUlps, dist);
  }
  EXPECT_LE(maxUlps, 3u) << "MaxAccuracy RandomModerate max ULP";
}

TEST(PowDouble, RandomModerate) {
  std::mt19937 rng(42);
  std::uniform_real_distribution<float> x_dist(0.01f, 100.0f);
  std::uniform_real_distribution<float> y_dist(-8.0f, 8.0f);

  uint32_t maxUlps = 0;
  constexpr int32_t kN = 1 << 20;
  for (int32_t i = 0; i < kN; ++i) {
    float x = x_dist(rng);
    float y = y_dist(rng);
    float expected = gt_pow(x, y);
    float result = dfm::detail::pow_double_core(std::fabs(x), y);
    if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
      continue;
    }
    uint32_t dist = dfm::float_distance(expected, result);
    maxUlps = std::max(maxUlps, dist);
  }
  EXPECT_LE(maxUlps, 4u) << "PowDouble RandomModerate max ULP";
}

TEST(PowDouble, RandomWide) {
  std::mt19937 rng(123);
  std::uniform_real_distribution<float> x_dist(1e-10f, 1e10f);
  std::uniform_real_distribution<float> y_dist(-4.0f, 4.0f);

  uint32_t maxUlps = 0;
  constexpr int32_t kN = 1 << 20;
  for (int32_t i = 0; i < kN; ++i) {
    float x = x_dist(rng);
    float y = y_dist(rng);
    float expected = gt_pow(x, y);
    float result = dfm::detail::pow_double_core(std::fabs(x), y);
    if (std::isnan(expected) || std::isinf(expected) || expected == 0.0f) {
      continue;
    }
    uint32_t dist = dfm::float_distance(expected, result);
    maxUlps = std::max(maxUlps, dist);
  }
  EXPECT_LE(maxUlps, 4u) << "PowDouble RandomWide max ULP";
}

// Wrappers for traits variants — macro instantiates func<Flt>.
// Wrappers needed because pow has (Flt, Flt) and (Flt, float) overloads.
template <typename Flt>
Flt pow_default(Flt x, Flt y) {
  return dfm::pow<Flt>(x, y);
}

template <typename Flt>
Flt pow_max(Flt x, Flt y) {
  return dfm::pow<Flt, dfm::MaxAccuracyTraits>(x, y);
}

// Default pow — positive bases, normal range.
constexpr uint32_t kPowDefaultUlps = 4;
// clang-format off
static const float kPowDefaultX[] = {
    2.0f, 3.0f, 4.0f, 0.5f,
    2.0f, 10.0f, 100.0f, 0.01f,
    1.0f, 0.5f, 2.0f};
static const float kPowDefaultY[] = {
    3.0f, 2.0f, 0.5f, -1.0f,
    10.0f, -1.0f, 0.5f, 2.0f,
    42.0f, -1.0f, 2.5f};
// clang-format on
FAST_MATH_SPECIAL_TESTS_2ARG(
    PowDefaultSpecial,
    gt_pow,
    pow_default,
    kPowDefaultX,
    kPowDefaultY,
    kPowDefaultUlps)

// MaxAccuracy handles negative bases, zero, inf, NaN, subnormals.
constexpr uint32_t kPowMaxAccUlps = 4;
// clang-format off
static const float kPowMaxAccX[] = {
    2.0f, 3.0f, 4.0f, 0.5f,                                            // basic powers
    -2.0f, -1.0f, -3.0f, -0.5f,                                        // neg base int exp
    -2.0f, -1.0f,                                                       // neg base non-int → NaN
    0.0f, -0.0f, 0.0f, -0.0f,                                          // zero base
    std::numeric_limits<float>::infinity(),                              // inf base
    -std::numeric_limits<float>::infinity(),
    std::numeric_limits<float>::infinity(),
    1.0f, 1.0f,                                                         // x=1 → always 1
    2.0f, std::numeric_limits<float>::quiet_NaN(),                      // NaN
    -1.0f, -1.0f,                                                       // pow(-1, ±inf) = 1
    0.5f, 2.0f};                                                        // |x| vs inf exp
static const float kPowMaxAccY[] = {
    3.0f, 2.0f, 0.5f, -1.0f,
    3.0f, 2.0f, 5.0f, -1.0f,
    0.5f, 1.5f,
    2.0f, 3.0f, -1.0f, -1.0f,
    2.0f,
    3.0f,
    -1.0f,
    42.0f, std::numeric_limits<float>::quiet_NaN(),
    0.0f, 2.0f,
    std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity()};
// clang-format on
FAST_MATH_SPECIAL_TESTS_2ARG(
    PowMaxAccSpecial,
    gt_pow,
    pow_max,
    kPowMaxAccX,
    kPowMaxAccY,
    kPowMaxAccUlps)


================================================
FILE: tests/fast_math/pow_ulp_eval.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Exhaustive ULP evaluation for fast_math::pow across scalar and SIMD backends.
// Uses Halton low-discrepancy sampling over 2D (base, exponent) domains.
//
// Usage: buck run <target> [-- [--samples N] [--filter name]]

#include <cmath>

#include <dispenso/fast_math/fast_math.h>

#include "bivariate_ulp_eval.h"

namespace dfm = dispenso::fast_math;

// --- Ground truth (double-precision internally) ---

static float gt_pow(float x, float y) {
  return static_cast<float>(std::pow(static_cast<double>(x), static_cast<double>(y)));
}

// --- Domains ---
// Positive bases only (negative base + non-integer exp is NaN by definition).

static dfm::Domain2D kPowDomains[] = {
    {0.01f, 100.0f, -8.0f, 8.0f, "moderate base, moderate exp"},
    {1e-10f, 1e10f, -4.0f, 4.0f, "wide base, small exp"},
    {0.5f, 2.0f, -50.0f, 50.0f, "near-unity base, large exp"},
    {1e-6f, 1e-1f, 0.1f, 4.0f, "small base, positive exp"},
    {1e2f, 1e20f, -2.0f, 2.0f, "large base, small exp"},
    {0.01f, 100.0f, 1.0f, 50.0f, "moderate base, integer-like exp"},
};

// --- Batch runners ---

static void pow_scalar(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; ++i)
    out[i] = dfm::pow(xs[i], ys[i]);
}

static void pow_scalar_accurate(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; ++i)
    out[i] = dfm::pow<float, dfm::MaxAccuracyTraits>(xs[i], ys[i]);
}

#if defined(__SSE4_1__)
static void pow_sse(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; i += 4) {
    dfm::SseFloat vx{_mm_load_ps(&xs[i])};
    dfm::SseFloat vy{_mm_load_ps(&ys[i])};
    dfm::SseFloat vr = dfm::pow(vx, vy);
    _mm_store_ps(&out[i], vr.v);
  }
}

static void pow_sse_accurate(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; i += 4) {
    dfm::SseFloat vx{_mm_load_ps(&xs[i])};
    dfm::SseFloat vy{_mm_load_ps(&ys[i])};
    dfm::SseFloat vr = dfm::pow<dfm::SseFloat, dfm::MaxAccuracyTraits>(vx, vy);
    _mm_store_ps(&out[i], vr.v);
  }
}
#endif // __SSE4_1__

#if defined(__AVX2__)
static void pow_avx(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; i += 8) {
    dfm::AvxFloat vx{_mm256_load_ps(&xs[i])};
    dfm::AvxFloat vy{_mm256_load_ps(&ys[i])};
    dfm::AvxFloat vr = dfm::pow(vx, vy);
    _mm256_store_ps(&out[i], vr.v);
  }
}

static void pow_avx_accurate(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; i += 8) {
    dfm::AvxFloat vx{_mm256_load_ps(&xs[i])};
    dfm::AvxFloat vy{_mm256_load_ps(&ys[i])};
    dfm::AvxFloat vr = dfm::pow<dfm::AvxFloat, dfm::MaxAccuracyTraits>(vx, vy);
    _mm256_store_ps(&out[i], vr.v);
  }
}
#endif // __AVX2__

#if defined(__AVX512F__)
static void pow_avx512(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; i += 16) {
    dfm::Avx512Float vx{_mm512_load_ps(&xs[i])};
    dfm::Avx512Float vy{_mm512_load_ps(&ys[i])};
    dfm::Avx512Float vr = dfm::pow(vx, vy);
    _mm512_store_ps(&out[i], vr.v);
  }
}

static void pow_avx512_accurate(const float* xs, const float* ys, float* out, int32_t n) {
  for (int32_t i = 0; i < n; i += 16) {
    dfm::Avx512Float vx{_mm512_load_ps(&xs[i])};
    dfm::Avx512Float vy{_mm512_load_ps(&ys[i])};
    dfm::Avx512Float vr = dfm::pow<dfm::Avx512Float, dfm::MaxAccuracyTraits>(vx, vy);
    _mm512_store_ps(&out[i], vr.v);
  }
}
#endif // __AVX512F__

// --- Main ---

int main(int argc, char** argv) {
  auto opts = dfm::parseEvalOptions(argc, argv);
  uint64_t n = opts.numSamples;

  printf(
      "pow ULP evaluation — %llu Halton samples per domain\n\n",
      static_cast<unsigned long long>(n));

  if (dfm::shouldRun(opts, "scalar")) {
    printf("=== pow scalar ===\n");
    dfm::evalFunc2D("pow scalar", gt_pow, pow_scalar, kPowDomains, n);
    printf("\n");
    printf("=== pow scalar MaxAccuracy ===\n");
    dfm::evalFunc2D("pow scalar accurate", gt_pow, pow_scalar_accurate, kPowDomains, n);
    printf("\n");
  }

#if defined(__SSE4_1__)
  if (dfm::shouldRun(opts, "sse")) {
    printf("=== pow SSE ===\n");
    dfm::evalFunc2D("pow SSE", gt_pow, pow_sse, kPowDomains, n);
    printf("\n");
    printf("=== pow SSE MaxAccuracy ===\n");
    dfm::evalFunc2D("pow SSE accurate", gt_pow, pow_sse_accurate, kPowDomains, n);
    printf("\n");
  }
#endif

#if defined(__AVX2__)
  if (dfm::shouldRun(opts, "avx")) {
    printf("=== pow AVX ===\n");
    dfm::evalFunc2D("pow AVX", gt_pow, pow_avx, kPowDomains, n);
    printf("\n");
    printf("=== pow AVX MaxAccuracy ===\n");
    dfm::evalFunc2D("pow AVX accurate", gt_pow, pow_avx_accurate, kPowDomains, n);
    printf("\n");
  }
#endif

#if defined(__AVX512F__)
  if (dfm::shouldRun(opts, "avx512")) {
    printf("=== pow AVX-512 ===\n");
    dfm::evalFunc2D("pow AVX-512", gt_pow, pow_avx512, kPowDomains, n);
    printf("\n");
    printf("=== pow AVX-512 MaxAccuracy ===\n");
    dfm::evalFunc2D("pow AVX-512 accurate", gt_pow, pow_avx512_accurate, kPowDomains, n);
    printf("\n");
  }
#endif

  return 0;
}


================================================
FILE: tests/fast_math/simd_test_utils.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Unified test infrastructure for fast_math functions across scalar and SIMD.
//
// Provides:
//   - SimdTestTraits<Flt>: lane count, load/store, lane extraction for any Flt type
//   - evalAccuracy<Flt>(): exhaustive accuracy evaluator that works identically for
//     scalar (float) and SIMD types — same function, same threshold, same call site
//   - checkLaneByLane<Flt>(): verify specific inputs against scalar ground truth
//   - FAST_MATH_ACCURACY_TESTS(): macro that generates one test per available backend,
//     all sharing a single ULP threshold (the source of truth)
//
// Usage:
//   #include "simd_test_utils.h"
//   using namespace dispenso::fast_math::testing;
//
//   static float gt_sin(float x) { return ::sinf(x); }
//   constexpr uint32_t kSinUlps = 1;
//   FAST_MATH_ACCURACY_TESTS(Sin, gt_sin, dfm::sin, -128*kPi, 128*kPi, kSinUlps)

#pragma once

#include <cmath>
#include <cstdint>
#include <cstdio>
#include <limits>

#include <dispenso/fast_math/fast_math.h>
#include <dispenso/fast_math/util.h>

#include "eval.h"

#include <gtest/gtest.h>

namespace dispenso {
namespace fast_math {
namespace testing {

// ---------------------------------------------------------------------------
// SimdTestTraits<Flt> — lane count, load, store, lane extraction.
// ---------------------------------------------------------------------------

// Maximum lane count we'll ever encounter (2048-bit vectors).
constexpr int32_t kMaxSimdLanes = 64;

// Primary template: scalar float.
template <typename Flt>
struct SimdTestTraits {
  static int32_t laneCount() {
    return 1;
  }
  static Flt load(const float* data) {
    return *data;
  }
  static void store(Flt v, float* data) {
    *data = v;
  }
  static float lane(Flt v, int32_t /*i*/) {
    return v;
  }
};

// --- x86 SSE ---
#if defined(__SSE4_1__)
template <>
struct SimdTestTraits<__m128> {
  static int32_t laneCount() {
    return 4;
  }
  static __m128 load(const float* data) {
    return _mm_load_ps(data);
  }
  static void store(__m128 v, float* data) {
    _mm_store_ps(data, v);
  }
  static float lane(__m128 v, int32_t i) {
    alignas(16) float buf[4];
    _mm_store_ps(buf, v);
    return buf[i];
  }
};
#endif

// --- x86 AVX2 ---
#if defined(__AVX2__)
template <>
struct SimdTestTraits<__m256> {
  static int32_t laneCount() {
    return 8;
  }
  static __m256 load(const float* data) {
    return _mm256_load_ps(data);
  }
  static void store(__m256 v, float* data) {
    _mm256_store_ps(data, v);
  }
  static float lane(__m256 v, int32_t i) {
    alignas(32) float buf[8];
    _mm256_store_ps(buf, v);
    return buf[i];
  }
};
#endif

// --- x86 AVX-512 ---
#if defined(__AVX512F__)
template <>
struct SimdTestTraits<__m512> {
  static int32_t laneCount() {
    return 16;
  }
  static __m512 load(const float* data) {
    return _mm512_load_ps(data);
  }
  static void store(__m512 v, float* data) {
    _mm512_store_ps(data, v);
  }
  static float lane(__m512 v, int32_t i) {
    alignas(64) float buf[16];
    _mm512_store_ps(buf, v);
    return buf[i];
  }
};
#endif

// --- ARM NEON ---
#if defined(__aarch64__)
template <>
struct SimdTestTraits<float32x4_t> {
  static int32_t laneCount() {
    return 4;
  }
  static float32x4_t load(const float* data) {
    return vld1q_f32(data);
  }
  static void store(float32x4_t v, float* data) {
    vst1q_f32(data, v);
  }
  static float lane(float32x4_t v, int32_t i) {
    float buf[4];
    vst1q_f32(buf, v);
    return buf[i];
  }
};
#endif

// --- Highway ---
#if __has_include("hwy/highway.h")
template <>
struct SimdTestTraits<HwyFloat> {
  static int32_t laneCount() {
    return static_cast<int32_t>(hwy::HWY_NAMESPACE::Lanes(HwyFloatTag{}));
  }
  static HwyFloat load(const float* data) {
    return hwy::HWY_NAMESPACE::Load(HwyFloatTag{}, data);
  }
  static void store(HwyFloat v, float* data) {
    hwy::HWY_NAMESPACE::Store(v.v, HwyFloatTag{}, data);
  }
  static float lane(HwyFloat v, int32_t i) {
    return hwy::HWY_NAMESPACE::ExtractLane(v.v, static_cast<size_t>(i));
  }
};
#endif

// ---------------------------------------------------------------------------
// evalAccuracy<Flt>() — unified accuracy evaluator for scalar and SIMD.
// ---------------------------------------------------------------------------

// Iterates every representable float in [lo, hi] via detail::nextafter.
// For SIMD types, packs N consecutive floats per call — processes the full
// domain in 1/N the iterations of scalar. Returns max ULP error observed.
//
// Ground truth `gt` is always called with scalar float.
// Approximation `fn` is called with Flt (float or SIMD vector).
template <typename Flt, typename GT, typename FN>
uint32_t evalAccuracy(GT gt, FN fn, float lo, float hi) {
  using Traits = SimdTestTraits<Flt>;
  const int32_t N = Traits::laneCount();
  uint32_t maxUlp = 0;

  if constexpr (std::is_same_v<Flt, float>) {
    // Scalar path — matches existing evalAccuracy in eval.h exactly.
    for (float f = lo; f <= hi; f = detail::nextafter(f)) {
      float expected = gt(f);
      float actual = fn(f);
      uint32_t d = float_distance(expected, actual);
      maxUlp = std::max(maxUlp, d);
      if (d > 100) {
        printf("%d, f(%.9g): %.9g, %.9g\n", d, f, expected, actual);
      }
    }
  } else {
    // SIMD path — pack N floats, call fn, compare per-lane.
    alignas(64) float inputs[kMaxSimdLanes];
    alignas(64) float outputs[kMaxSimdLanes];
    float f = lo;

    while (f <= hi) {
      // Fill input lanes.
      for (int32_t i = 0; i < N; ++i) {
        inputs[i] = f;
        if (f <= hi) {
          f = detail::nextafter(f);
        }
      }

      Flt result = fn(Traits::load(inputs));
      Traits::store(result, outputs);

      // Compare each lane against ground truth.
      for (int32_t i = 0; i < N; ++i) {
        float expected = gt(inputs[i]);
        float actual = outputs[i];
        // Skip non-finite results (NaN/Inf comparisons are meaningless).
        if (!std::isfinite(expected) || !std::isfinite(actual)) {
          continue;
        }
        uint32_t d = float_distance(expected, actual);
        maxUlp = std::max(maxUlp, d);
        if (d > 100) {
          printf("%d, f(%.9g): %.9g, %.9g\n", d, inputs[i], expected, actual);
        }
      }
    }
  }

  return maxUlp;
}

// ---------------------------------------------------------------------------
// checkLaneByLane<Flt>() — verify specific inputs against ground truth.
// ---------------------------------------------------------------------------

// Packs `numInputs` floats into SIMD vectors and verifies each output lane
// against `gt`. For float, just checks one value at a time.
// numInputs does not need to be a multiple of the lane count; excess inputs
// are checked in additional calls.
template <typename Flt, typename GT, typename FN>
void checkLaneByLane(GT gt, FN fn, const float* inputs, int32_t numInputs, uint32_t maxUlps) {
  using Traits = SimdTestTraits<Flt>;
  const int32_t N = Traits::laneCount();

  alignas(64) float buf[kMaxSimdLanes];
  alignas(64) float out[kMaxSimdLanes];

  for (int32_t base = 0; base < numInputs; base += N) {
    // Fill — pad with last valid input if numInputs isn't a multiple of N.
    int32_t count = std::min(N, numInputs - base);
    for (int32_t i = 0; i < count; ++i) {
      buf[i] = inputs[base + i];
    }
    for (int32_t i = count; i < N; ++i) {
      buf[i] = buf[count - 1];
    }

    Flt result = fn(Traits::load(buf));
    Traits::store(result, out);

    for (int32_t i = 0; i < count; ++i) {
      float expected = gt(buf[i]);
      float actual = out[i];
      if (std::isnan(expected)) {
        EXPECT_TRUE(std::isnan(actual)) << "input=" << buf[i] << " expected NaN, got " << actual;
      } else if (std::isinf(expected)) {
        EXPECT_EQ(expected, actual)
            << "input=" << buf[i] << " expected=" << expected << " actual=" << actual;
      } else {
        uint32_t dist = float_distance(expected, actual);
        EXPECT_LE(dist, maxUlps) << "input=" << buf[i] << " expected=" << expected
                                 << " actual=" << actual << " ulps=" << dist;
      }
    }
  }
}

// ---------------------------------------------------------------------------
// FAST_MATH_ACCURACY_TESTS — generate scalar + all SIMD backend tests.
// ---------------------------------------------------------------------------

// Usage:
//   FAST_MATH_ACCURACY_TESTS(Sin, gt_sin, dfm::sin, -128*kPi, 128*kPi, 1)
//
// Expands to one TEST per available backend:
//   TEST(Sin, Scalar)        — evalAccuracy<float>
//   TEST(SinSse, Range)      — evalAccuracy<__m128>
//   TEST(SinAvx, Range)      — evalAccuracy<__m256>
//   TEST(SinAvx512, Range)   — evalAccuracy<__m512>
//   TEST(SinNeon, Range)     — evalAccuracy<float32x4_t>
//   TEST(SinHwy, Range)      — evalAccuracy<HwyFloat>
//
// For traits variants, define a wrapper template:
//   template <typename Flt>
//   Flt sin_max(Flt x) { return dfm::sin<Flt, dfm::MaxAccuracyTraits>(x); }
//   FAST_MATH_ACCURACY_TESTS(SinMax, gt_sin, sin_max, ...)

// Per-backend macros: expand to a TEST or to nothing based on ISA availability.

#if defined(__SSE4_1__)
#define FAST_MATH_SSE_TEST(Suite, gt, func, lo, hi, maxUlps)                                     \
  TEST(Suite##Sse, Range) {                                                                      \
    EXPECT_LE((evalAccuracy<__m128>(gt, func<__m128>, lo, hi)), static_cast<uint32_t>(maxUlps)); \
  }
#else
#define FAST_MATH_SSE_TEST(Suite, gt, func, lo, hi, maxUlps)
#endif

#if defined(__AVX2__)
#define FAST_MATH_AVX_TEST(Suite, gt, func, lo, hi, maxUlps)                                     \
  TEST(Suite##Avx, Range) {                                                                      \
    EXPECT_LE((evalAccuracy<__m256>(gt, func<__m256>, lo, hi)), static_cast<uint32_t>(maxUlps)); \
  }
#else
#define FAST_MATH_AVX_TEST(Suite, gt, func, lo, hi, maxUlps)
#endif

#if defined(__AVX512F__)
#define FAST_MATH_AVX512_TEST(Suite, gt, func, lo, hi, maxUlps)                                  \
  TEST(Suite##Avx512, Range) {                                                                   \
    EXPECT_LE((evalAccuracy<__m512>(gt, func<__m512>, lo, hi)), static_cast<uint32_t>(maxUlps)); \
  }
#else
#define FAST_MATH_AVX512_TEST(Suite, gt, func, lo, hi, maxUlps)
#endif

#if defined(__aarch64__)
#define FAST_MATH_NEON_TEST(Suite, gt, func, lo, hi, maxUlps)       \
  TEST(Suite##Neon, Range) {                                        \
    EXPECT_LE(                                                      \
        (evalAccuracy<float32x4_t>(gt, func<float32x4_t>, lo, hi)), \
        static_cast<uint32_t>(maxUlps));                            \
  }
#else
#define FAST_MATH_NEON_TEST(Suite, gt, func, lo, hi, maxUlps)
#endif

#if __has_include("hwy/highway.h")
#define FAST_MATH_HWY_TEST(Suite, gt, func, lo, hi, maxUlps)   \
  TEST(Suite##Hwy, Range) {                                    \
    EXPECT_LE(                                                 \
        (evalAccuracy<dispenso::fast_math::HwyFloat>(          \
            gt, func<dispenso::fast_math::HwyFloat>, lo, hi)), \
        static_cast<uint32_t>(maxUlps));                       \
  }
#else
#define FAST_MATH_HWY_TEST(Suite, gt, func, lo, hi, maxUlps)
#endif

// Main macro: generates one test per available backend.
#define FAST_MATH_ACCURACY_TESTS(Suite, gt, func, lo, hi, maxUlps)                             \
  TEST(Suite, Scalar) {                                                                        \
    EXPECT_LE((evalAccuracy<float>(gt, func<float>, lo, hi)), static_cast<uint32_t>(maxUlps)); \
  }                                                                                            \
  FAST_MATH_SSE_TEST(Suite, gt, func, lo, hi, maxUlps)                                         \
  FAST_MATH_AVX_TEST(Suite, gt, func, lo, hi, maxUlps)                                         \
  FAST_MATH_AVX512_TEST(Suite, gt, func, lo, hi, maxUlps)                                      \
  FAST_MATH_NEON_TEST(Suite, gt, func, lo, hi, maxUlps)                                        \
  FAST_MATH_HWY_TEST(Suite, gt, func, lo, hi, maxUlps)

// ---------------------------------------------------------------------------
// FAST_MATH_SPECIAL_TESTS — test hand-picked special values across backends.
// ---------------------------------------------------------------------------
//
// Usage:
//   static const float kAtanSpecials[] = {0.0f, -1e7f, 1e7f, NaN, Inf, -Inf};
//   FAST_MATH_SPECIAL_TESTS(AtanSpecial, ::atanf, dfm::atan, kAtanSpecials, 0)
//
// Uses checkLaneByLane to verify each output lane against scalar ground truth.
// NaN inputs are checked for NaN output; finite inputs are checked within maxUlps.

#if defined(__SSE4_1__)
#define FAST_MATH_SSE_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps) \
  TEST(Suite##Sse, SpecialVals) {                                    \
    checkLaneByLane<__m128>(                                         \
        gt,                                                          \
        func<__m128>,                                                \
        inputs,                                                      \
        static_cast<int32_t>(sizeof(inputs) / sizeof(inputs[0])),    \
        maxUlps);                                                    \
  }
#else
#define FAST_MATH_SSE_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)
#endif

#if defined(__AVX2__)
#define FAST_MATH_AVX_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps) \
  TEST(Suite##Avx, SpecialVals) {                                    \
    checkLaneByLane<__m256>(                                         \
        gt,                                                          \
        func<__m256>,                                                \
        inputs,                                                      \
        static_cast<int32_t>(sizeof(inputs) / sizeof(inputs[0])),    \
        maxUlps);                                                    \
  }
#else
#define FAST_MATH_AVX_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)
#endif

#if defined(__AVX512F__)
#define FAST_MATH_AVX512_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps) \
  TEST(Suite##Avx512, SpecialVals) {                                    \
    checkLaneByLane<__m512>(                                            \
        gt,                                                             \
        func<__m512>,                                                   \
        inputs,                                                         \
        static_cast<int32_t>(sizeof(inputs) / sizeof(inputs[0])),       \
        maxUlps);                                                       \
  }
#else
#define FAST_MATH_AVX512_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)
#endif

#if defined(__aarch64__)
#define FAST_MATH_NEON_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps) \
  TEST(Suite##Neon, SpecialVals) {                                    \
    checkLaneByLane<float32x4_t>(                                     \
        gt,                                                           \
        func<float32x4_t>,                                            \
        inputs,                                                       \
        static_cast<int32_t>(sizeof(inputs) / sizeof(inputs[0])),     \
        maxUlps);                                                     \
  }
#else
#define FAST_MATH_NEON_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)
#endif

#if __has_include("hwy/highway.h")
#define FAST_MATH_HWY_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps) \
  TEST(Suite##Hwy, SpecialVals) {                                    \
    checkLaneByLane<dispenso::fast_math::HwyFloat>(                  \
        gt,                                                          \
        func<dispenso::fast_math::HwyFloat>,                         \
        inputs,                                                      \
        static_cast<int32_t>(sizeof(inputs) / sizeof(inputs[0])),    \
        maxUlps);                                                    \
  }
#else
#define FAST_MATH_HWY_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)
#endif

// Main macro: generates one special-values test per available backend.
#define FAST_MATH_SPECIAL_TESTS(Suite, gt, func, inputs, maxUlps) \
  TEST(Suite, SpecialScalar) {                                    \
    checkLaneByLane<float>(                                       \
        gt,                                                       \
        func<float>,                                              \
        inputs,                                                   \
        static_cast<int32_t>(sizeof(inputs) / sizeof(inputs[0])), \
        maxUlps);                                                 \
  }                                                               \
  FAST_MATH_SSE_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)    \
  FAST_MATH_AVX_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)    \
  FAST_MATH_AVX512_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps) \
  FAST_MATH_NEON_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)   \
  FAST_MATH_HWY_SPECIAL_TEST(Suite, gt, func, inputs, maxUlps)

// ---------------------------------------------------------------------------
// Two-argument functions: checkLaneByLane2 + FAST_MATH_SPECIAL_TESTS_2ARG
// ---------------------------------------------------------------------------
//
// For functions like atan2(y,x), hypot(x,y), pow(base,exp) that take two
// float arguments. Both aInputs and bInputs arrays must have the same length.
// Parameters are named a/b (not x/y) because the mapping varies by function:
// atan2 passes (y, x), hypot passes (x, y), pow passes (base, exp).
//
// Usage:
//   static const float kAtan2Y[] = {0.0f, 1.0f, -1.0f, ...};
//   static const float kAtan2X[] = {1.0f, 0.0f,  1.0f, ...};
//   FAST_MATH_SPECIAL_TESTS_2ARG(Atan2Special, ::atan2f, dfm::atan2,
//                                 kAtan2Y, kAtan2X, 3)

// Packs numInputs pairs of (a[i], b[i]) into SIMD vectors and verifies
// each output lane against gt(a[i], b[i]).
template <typename Flt, typename GT, typename FN>
void checkLaneByLane2(
    GT gt,
    FN fn,
    const float* aInputs,
    const float* bInputs,
    int32_t numInputs,
    uint32_t maxUlps) {
  using Traits = SimdTestTraits<Flt>;
  const int32_t N = Traits::laneCount();

  alignas(64) float aBuf[kMaxSimdLanes];
  alignas(64) float bBuf[kMaxSimdLanes];
  alignas(64) float out[kMaxSimdLanes];

  for (int32_t base = 0; base < numInputs; base += N) {
    int32_t count = std::min(N, numInputs - base);
    for (int32_t i = 0; i < count; ++i) {
      aBuf[i] = aInputs[base + i];
      bBuf[i] = bInputs[base + i];
    }
    for (int32_t i = count; i < N; ++i) {
      aBuf[i] = aBuf[count - 1];
      bBuf[i] = bBuf[count - 1];
    }

    Flt result = fn(Traits::load(aBuf), Traits::load(bBuf));
    Traits::store(result, out);

    for (int32_t i = 0; i < count; ++i) {
      float expected = gt(aBuf[i], bBuf[i]);
      float actual = out[i];
      if (std::isnan(expected)) {
        EXPECT_TRUE(std::isnan(actual))
            << "inputs=(" << aBuf[i] << ", " << bBuf[i] << ") expected NaN, got " << actual;
      } else if (std::isinf(expected)) {
        EXPECT_EQ(expected, actual) << "inputs=(" << aBuf[i] << ", " << bBuf[i]
                                    << ") expected=" << expected << " actual=" << actual;
      } else {
        uint32_t dist = float_distance(expected, actual);
        EXPECT_LE(dist, maxUlps) << "inputs=(" << aBuf[i] << ", " << bBuf[i]
                                 << ") expected=" << expected << " actual=" << actual
                                 << " ulps=" << dist;
      }
    }
  }
}

// Per-backend macros for two-argument special tests.

#if defined(__SSE4_1__)
#define FAST_MATH_SSE_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                       \
  TEST(Suite##Sse, SpecialVals) {                                                                 \
    checkLaneByLane2<__m128>(                                                                     \
        gt, func<__m128>, aIn, bIn, static_cast<int32_t>(sizeof(aIn) / sizeof(aIn[0])), maxUlps); \
  }
#else
#define FAST_MATH_SSE_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)
#endif

#if defined(__AVX2__)
#define FAST_MATH_AVX_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                       \
  TEST(Suite##Avx, SpecialVals) {                                                                 \
    checkLaneByLane2<__m256>(                                                                     \
        gt, func<__m256>, aIn, bIn, static_cast<int32_t>(sizeof(aIn) / sizeof(aIn[0])), maxUlps); \
  }
#else
#define FAST_MATH_AVX_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)
#endif

#if defined(__AVX512F__)
#define FAST_MATH_AVX512_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                    \
  TEST(Suite##Avx512, SpecialVals) {                                                              \
    checkLaneByLane2<__m512>(                                                                     \
        gt, func<__m512>, aIn, bIn, static_cast<int32_t>(sizeof(aIn) / sizeof(aIn[0])), maxUlps); \
  }
#else
#define FAST_MATH_AVX512_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)
#endif

#if defined(__aarch64__)
#define FAST_MATH_NEON_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps) \
  TEST(Suite##Neon, SpecialVals) {                                           \
    checkLaneByLane2<float32x4_t>(                                           \
        gt,                                                                  \
        func<float32x4_t>,                                                   \
        aIn,                                                                 \
        bIn,                                                                 \
        static_cast<int32_t>(sizeof(aIn) / sizeof(aIn[0])),                  \
        maxUlps);                                                            \
  }
#else
#define FAST_MATH_NEON_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)
#endif

#if __has_include("hwy/highway.h")
#define FAST_MATH_HWY_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps) \
  TEST(Suite##Hwy, SpecialVals) {                                           \
    checkLaneByLane2<dispenso::fast_math::HwyFloat>(                        \
        gt,                                                                 \
        func<dispenso::fast_math::HwyFloat>,                                \
        aIn,                                                                \
        bIn,                                                                \
        static_cast<int32_t>(sizeof(aIn) / sizeof(aIn[0])),                 \
        maxUlps);                                                           \
  }
#else
#define FAST_MATH_HWY_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)
#endif

// Main macro: generates one two-arg special-values test per available backend.
#define FAST_MATH_SPECIAL_TESTS_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                         \
  TEST(Suite, SpecialScalar) {                                                                   \
    checkLaneByLane2<float>(                                                                     \
        gt, func<float>, aIn, bIn, static_cast<int32_t>(sizeof(aIn) / sizeof(aIn[0])), maxUlps); \
  }                                                                                              \
  FAST_MATH_SSE_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                            \
  FAST_MATH_AVX_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                            \
  FAST_MATH_AVX512_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                         \
  FAST_MATH_NEON_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)                           \
  FAST_MATH_HWY_SPECIAL_TEST_2ARG(Suite, gt, func, aIn, bIn, maxUlps)

} // namespace testing
} // namespace fast_math
} // namespace dispenso


================================================
FILE: tests/fast_math/sin_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

auto sin_accurate = dispenso::fast_math::sin<float, dispenso::fast_math::MaxAccuracyTraits>;
// MacOS seems to have a slightly busted sin and sinf, but sinl seems to agree with other platforms.
#if defined(__APPLE__)
float gt_sin(float x) {
  return ::sinl(x);
}
#else
auto gt_sin = ::sinf;
#endif //__APPLE__

TEST(Sin, SpecialValues) {
  constexpr float kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();
  auto sin = dispenso::fast_math::sin<float, dispenso::fast_math::MaxAccuracyTraits>;

  EXPECT_NE(sin(-kInf), sin(-kInf));
  EXPECT_NE(sin(kInf), sin(kInf));
  EXPECT_NE(sin(kNaN), sin(kNaN));
  EXPECT_EQ(sin(0.0f), 0.0f);
}

constexpr uint32_t kSinAccurateUlps = 1;
constexpr uint32_t kSinAccurateUlpsVeryLarge = 2;

constexpr uint32_t kSinUlps = 1;
constexpr uint32_t kSinUlpsLarge = 2;

TEST(Sin, RangePi) {
  auto result = dispenso::fast_math::evalAccuracy(gt_sin, sin_accurate, -kPi, kPi);

  EXPECT_LE(result, kSinAccurateUlps);
}

TEST(Sin, Range128Pi) {
  auto result = dispenso::fast_math::evalAccuracy(gt_sin, sin_accurate, -128 * kPi, 128 * kPi);

  EXPECT_LE(result, kSinAccurateUlps);
}

TEST(Sin, Range1MPi) {
  auto result =
      dispenso::fast_math::evalAccuracy(gt_sin, sin_accurate, -(1 << 20) * kPi, -128 * kPi);
  EXPECT_LE(result, kSinAccurateUlpsVeryLarge);
  result = dispenso::fast_math::evalAccuracy(gt_sin, sin_accurate, 128 * kPi, (1 << 20) * kPi);

  EXPECT_LE(result, kSinAccurateUlpsVeryLarge);
}

TEST(SinLessAccurate, RangePi) {
  auto result =
      dispenso::fast_math::evalAccuracy(gt_sin, dispenso::fast_math::sin<float>, -kPi, kPi);

  EXPECT_LE(result, kSinUlps);
}

TEST(SinLessAccurate, Range128Pi) {
  auto result =
      dispenso::fast_math::evalAccuracy(gt_sin, dispenso::fast_math::sin<float>, -128 * kPi, -kPi);

  EXPECT_LE(result, kSinUlps);

  result =
      dispenso::fast_math::evalAccuracy(gt_sin, dispenso::fast_math::sin<float>, kPi, 128 * kPi);

  EXPECT_LE(result, kSinUlps);
}

TEST(SinLessAccurate, Range32768Pi) {
  auto result = dispenso::fast_math::evalAccuracy(
      gt_sin, dispenso::fast_math::sin<float>, -32768 * kPi, -128 * kPi);

  EXPECT_LE(result, kSinUlpsLarge);

  result = dispenso::fast_math::evalAccuracy(
      gt_sin, dispenso::fast_math::sin<float>, 128 * kPi, 32768 * kPi);

  EXPECT_LE(result, kSinUlpsLarge);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt sin_max(Flt x) {
  return dfm::sin<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
FAST_MATH_ACCURACY_TESTS(SinDefaultAll, gt_sin, dfm::sin, -32768 * kPi, 32768 * kPi, kSinUlpsLarge)
FAST_MATH_ACCURACY_TESTS(
    SinMaxAccAll,
    gt_sin,
    sin_max,
    -(1 << 20) * kPi,
    (1 << 20) * kPi,
    kSinAccurateUlpsVeryLarge)

// Special values tested across all SIMD backends.
static const float kSinSpecials[] = {
    0.0f,
    -0.0f,
    kPi,
    -kPi,
    kPi_2,
    -kPi_2,
    2.0f * kPi,
    -2.0f * kPi,
    100.0f,
    -100.0f,
    1000.0f,
    -1000.0f,
    1e-6f,
    std::numeric_limits<float>::denorm_min(),
    std::numeric_limits<float>::min(),
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(SinMaxAccSpecial, gt_sin, sin_max, kSinSpecials, kSinAccurateUlpsVeryLarge)
FAST_MATH_SPECIAL_TESTS(SinDefaultSpecial, gt_sin, dfm::sin, kSinSpecials, kSinUlpsLarge)


================================================
FILE: tests/fast_math/sincos_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "eval.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;

TEST(SinCos, BasicValues) {
  float s, c;
  dfm::sincos(0.0f, &s, &c);
  EXPECT_FLOAT_EQ(s, 0.0f);
  EXPECT_FLOAT_EQ(c, 1.0f);

  dfm::sincos(kPi_2, &s, &c);
  EXPECT_NEAR(s, 1.0f, 1e-6f);
  EXPECT_NEAR(c, 0.0f, 1e-6f);

  dfm::sincos(kPi, &s, &c);
  EXPECT_NEAR(s, 0.0f, 1e-6f);
  EXPECT_NEAR(c, -1.0f, 1e-6f);

  dfm::sincos(-kPi_2, &s, &c);
  EXPECT_NEAR(s, -1.0f, 1e-6f);
  EXPECT_NEAR(c, 0.0f, 1e-6f);
}

TEST(SinCos, MatchesSeparateCalls) {
  constexpr int kSteps = 1024;
  constexpr float kRange = 128.0f * kPi;
  constexpr float kDelta = 2.0f * kRange / kSteps;

  for (int i = 0; i < kSteps; ++i) {
    float x = -kRange + static_cast<float>(i) * kDelta;
    float s, c;
    dfm::sincos(x, &s, &c);
    float expected_s = dfm::sin(x);
    float expected_c = dfm::cos(x);
    EXPECT_EQ(s, expected_s) << "sin mismatch at x=" << x;
    EXPECT_EQ(c, expected_c) << "cos mismatch at x=" << x;
  }
}

TEST(SinCos, AccuracyVsLibc) {
  constexpr int kSteps = 4096;
  constexpr float kRange = 128.0f * kPi;
  constexpr float kDelta = 2.0f * kRange / kSteps;

  for (int i = 0; i < kSteps; ++i) {
    float x = -kRange + static_cast<float>(i) * kDelta;
    float s, c;
    dfm::sincos(x, &s, &c);

    float gt_s = static_cast<float>(::sin(static_cast<double>(x)));
    float gt_c = static_cast<float>(::cos(static_cast<double>(x)));

    uint32_t sin_dist = dfm::float_distance(gt_s, s);
    uint32_t cos_dist = dfm::float_distance(gt_c, c);

    EXPECT_LE(sin_dist, 2u) << "sin at x=" << x << " expected=" << gt_s << " actual=" << s;
    EXPECT_LE(cos_dist, 2u) << "cos at x=" << x << " expected=" << gt_c << " actual=" << c;
  }
}

TEST(SinCos, MaxAccuracy) {
  constexpr int kSteps = 4096;
  constexpr float kRange = 128.0f * kPi;
  constexpr float kDelta = 2.0f * kRange / kSteps;

  for (int i = 0; i < kSteps; ++i) {
    float x = -kRange + static_cast<float>(i) * kDelta;
    float s, c;
    dfm::sincos<float, dfm::MaxAccuracyTraits>(x, &s, &c);

    float gt_s = static_cast<float>(::sin(static_cast<double>(x)));
    float gt_c = static_cast<float>(::cos(static_cast<double>(x)));

    EXPECT_LE(dfm::float_distance(gt_s, s), 2u) << "sin at x=" << x;
    EXPECT_LE(dfm::float_distance(gt_c, c), 2u) << "cos at x=" << x;
  }
}


================================================
FILE: tests/fast_math/sinpi_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "eval.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;

// Ground truth for sinpi/cospi: use double-precision sinpi where available,
// otherwise compute via exact range reduction in double + sin/cos.
static float gt_sinpi(float x) {
  double xd = static_cast<double>(x);
  // Exact range reduction in double: much more precise than sin(pi * x).
  double j = std::round(2.0 * xd);
  double r = xd - j * 0.5;
  double t = r * M_PI;
  int qi = static_cast<int>(j) & 3;
  double sv = std::sin(t);
  double cv = std::cos(t);
  double result;
  switch (qi) {
    case 0:
      result = sv;
      break;
    case 1:
      result = cv;
      break;
    case 2:
      result = -sv;
      break;
    case 3:
      result = -cv;
      break;
    default:
      result = 0.0;
      break;
  }
  return static_cast<float>(result);
}

static float gt_cospi(float x) {
  double xd = static_cast<double>(x);
  double j = std::round(2.0 * xd);
  double r = xd - j * 0.5;
  double t = r * M_PI;
  int qi = (static_cast<int>(j) + 1) & 3;
  double sv = std::sin(t);
  double cv = std::cos(t);
  double result;
  switch (qi) {
    case 0:
      result = sv;
      break;
    case 1:
      result = cv;
      break;
    case 2:
      result = -sv;
      break;
    case 3:
      result = -cv;
      break;
    default:
      result = 0.0;
      break;
  }
  return static_cast<float>(result);
}

// --- sinpi ---

TEST(SinPi, ExactValues) {
  // sinpi(n) = 0 for all integers
  EXPECT_EQ(dfm::sinpi(0.0f), 0.0f);
  EXPECT_EQ(dfm::sinpi(1.0f), 0.0f);
  EXPECT_EQ(dfm::sinpi(-1.0f), 0.0f);
  EXPECT_EQ(dfm::sinpi(2.0f), 0.0f);
  EXPECT_EQ(dfm::sinpi(100.0f), 0.0f);

  // sinpi(0.5) = 1, sinpi(-0.5) = -1
  EXPECT_FLOAT_EQ(dfm::sinpi(0.5f), 1.0f);
  EXPECT_FLOAT_EQ(dfm::sinpi(-0.5f), -1.0f);
  EXPECT_FLOAT_EQ(dfm::sinpi(1.5f), -1.0f);
  EXPECT_FLOAT_EQ(dfm::sinpi(-1.5f), 1.0f);
}

TEST(SinPi, AccuracyVsLibc) {
  constexpr int kSteps = 4096;
  for (int i = 0; i < kSteps; ++i) {
    float x = -10.0f + 20.0f * static_cast<float>(i) / kSteps;
    float result = dfm::sinpi(x);
    float expected = gt_sinpi(x);
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, 2u) << "sinpi(" << x << ") = " << result << " expected " << expected;
  }
}

// --- cospi ---

TEST(CosPi, ExactValues) {
  // cospi(n) = ±1 for integers
  EXPECT_FLOAT_EQ(dfm::cospi(0.0f), 1.0f);
  EXPECT_FLOAT_EQ(dfm::cospi(1.0f), -1.0f);
  EXPECT_FLOAT_EQ(dfm::cospi(-1.0f), -1.0f);
  EXPECT_FLOAT_EQ(dfm::cospi(2.0f), 1.0f);

  // cospi(0.5) = 0
  EXPECT_NEAR(dfm::cospi(0.5f), 0.0f, 1e-6f);
  EXPECT_NEAR(dfm::cospi(-0.5f), 0.0f, 1e-6f);
  EXPECT_NEAR(dfm::cospi(1.5f), 0.0f, 1e-6f);
}

TEST(CosPi, AccuracyVsLibc) {
  constexpr int kSteps = 4096;
  for (int i = 0; i < kSteps; ++i) {
    float x = -10.0f + 20.0f * static_cast<float>(i) / kSteps;
    float result = dfm::cospi(x);
    float expected = gt_cospi(x);
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, 2u) << "cospi(" << x << ") = " << result << " expected " << expected;
  }
}

// --- sincospi ---

TEST(SinCosPi, MatchesSeparateCalls) {
  constexpr int kSteps = 1024;
  for (int i = 0; i < kSteps; ++i) {
    float x = -10.0f + 20.0f * static_cast<float>(i) / kSteps;
    float s, c;
    dfm::sincospi(x, &s, &c);
    EXPECT_EQ(s, dfm::sinpi(x)) << "sin mismatch at x=" << x;
    EXPECT_EQ(c, dfm::cospi(x)) << "cos mismatch at x=" << x;
  }
}

TEST(SinCosPi, AccuracyVsLibc) {
  constexpr int kSteps = 4096;
  for (int i = 0; i < kSteps; ++i) {
    float x = -10.0f + 20.0f * static_cast<float>(i) / kSteps;
    float s, c;
    dfm::sincospi(x, &s, &c);
    float gt_s = gt_sinpi(x);
    float gt_c = gt_cospi(x);
    EXPECT_LE(dfm::float_distance(gt_s, s), 2u) << "sinpi at x=" << x;
    EXPECT_LE(dfm::float_distance(gt_c, c), 2u) << "cospi at x=" << x;
  }
}

TEST(SinPi, LargeArguments) {
  // For |x| >= 2^23, every float is an integer, sinpi = 0.
  EXPECT_EQ(dfm::sinpi(8388608.0f), 0.0f);
  EXPECT_EQ(dfm::sinpi(16777216.0f), 0.0f);
  EXPECT_EQ(dfm::sinpi(-8388608.0f), 0.0f);
}

TEST(CosPi, LargeArguments) {
  // For |x| >= 2^23, every float is an integer, cospi = ±1.
  float c = dfm::cospi(8388608.0f);
  EXPECT_TRUE(c == 1.0f || c == -1.0f);
  c = dfm::cospi(16777216.0f);
  EXPECT_TRUE(c == 1.0f || c == -1.0f);
}


================================================
FILE: tests/fast_math/sse_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include <gtest/gtest.h>

#if defined(__SSE4_1__)

namespace dfm = dispenso::fast_math;
using SseFloat = dfm::SseFloat;
using SseInt32 = dfm::SseInt32;
using SseUint32 = dfm::SseUint32;

// Helper: extract lane i from __m128.
static float lane(__m128 v, int i) {
  alignas(16) float buf[4];
  _mm_store_ps(buf, v);
  return buf[i];
}

// Helper: extract lane i from __m128i (as int32_t).
static int32_t lane(__m128i v, int i) {
  alignas(16) int32_t buf[4];
  _mm_store_si128(reinterpret_cast<__m128i*>(buf), v);
  return buf[i];
}

// Helper: create __m128 from 4 distinct values.
static __m128 make4(float a, float b, float c, float d) {
  return _mm_set_ps(d, c, b, a);
}

// ---- SseFloat basic arithmetic ----

TEST(SseFloat, Broadcast) {
  SseFloat v(3.0f);
  for (int i = 0; i < 4; ++i) {
    EXPECT_EQ(lane(v, i), 3.0f);
  }
}

TEST(SseFloat, Arithmetic) {
  SseFloat a = make4(1.0f, 2.0f, 3.0f, 4.0f);
  SseFloat b = make4(5.0f, 6.0f, 7.0f, 8.0f);

  SseFloat sum = a + b;
  EXPECT_EQ(lane(sum, 0), 6.0f);
  EXPECT_EQ(lane(sum, 3), 12.0f);

  SseFloat diff = b - a;
  EXPECT_EQ(lane(diff, 0), 4.0f);
  EXPECT_EQ(lane(diff, 3), 4.0f);

  SseFloat prod = a * b;
  EXPECT_EQ(lane(prod, 0), 5.0f);
  EXPECT_EQ(lane(prod, 3), 32.0f);

  SseFloat quot = b / a;
  EXPECT_EQ(lane(quot, 0), 5.0f);
  EXPECT_EQ(lane(quot, 1), 3.0f);
}

TEST(SseFloat, Negation) {
  SseFloat a = make4(1.0f, -2.0f, 0.0f, 3.0f);
  SseFloat neg = -a;
  EXPECT_EQ(lane(neg, 0), -1.0f);
  EXPECT_EQ(lane(neg, 1), 2.0f);
  EXPECT_EQ(lane(neg, 2), -0.0f);
  EXPECT_EQ(lane(neg, 3), -3.0f);
}

// ---- SseFloat comparisons ----

TEST(SseFloat, Comparisons) {
  SseFloat a = make4(1.0f, 2.0f, 3.0f, 4.0f);
  SseFloat b = make4(2.0f, 2.0f, 1.0f, 5.0f);

  SseFloat lt = a < b;
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(lt, 0)), 0u); // 1 < 2 true
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 1)), 0u); // 2 < 2 false
  EXPECT_EQ(dfm::bit_cast<uint32_t>(lane(lt, 2)), 0u); // 3 < 1 false
  EXPECT_NE(dfm::bit_cast<uint32_t>(lane(lt, 3)), 0u); // 4 < 5 true
}

// ---- SseInt32 arithmetic ----

TEST(SseInt32, BasicOps) {
  SseInt32 a(10);
  SseInt32 b(3);
  EXPECT_EQ(lane(a + b, 0), 13);
  EXPECT_EQ(lane(a - b, 0), 7);
  EXPECT_EQ(lane(a * b, 0), 30);
}

TEST(SseInt32, ShiftOps) {
  SseInt32 a(8);
  EXPECT_EQ(lane(a << 2, 0), 32);
  EXPECT_EQ(lane(a >> 1, 0), 4);

  SseInt32 neg(-8);
  EXPECT_EQ(lane(neg >> 1, 0), -4); // Arithmetic shift preserves sign
}

// ---- SseUint32 ----

TEST(SseUint32, LogicalShift) {
  SseUint32 a(0x80000000u);
  // Logical shift right — does NOT preserve sign.
  EXPECT_EQ(static_cast<uint32_t>(lane(a >> 1, 0)), 0x40000000u);
}

// ---- bit_cast ----

TEST(SseBitCast, FloatToInt) {
  SseFloat f(1.0f);
  SseInt32 i = dfm::bit_cast<SseInt32>(f);
  EXPECT_EQ(lane(i, 0), 0x3f800000);
}

TEST(SseBitCast, IntToFloat) {
  SseInt32 i(0x40000000);
  SseFloat f = dfm::bit_cast<SseFloat>(i);
  EXPECT_EQ(lane(f, 0), 2.0f);
}

TEST(SseBitCast, RoundTrip) {
  SseFloat original = make4(1.0f, -2.0f, 0.5f, 42.0f);
  SseFloat roundtripped = dfm::bit_cast<SseFloat>(dfm::bit_cast<SseInt32>(original));
  for (int i = 0; i < 4; ++i) {
    EXPECT_EQ(lane(roundtripped, i), lane(original, i));
  }
}

// ---- FloatTraits<SseFloat> ----

TEST(SseFloatTraits, Conditional) {
  SseFloat mask = SseFloat(make4(1.0f, 0.0f, 1.0f, 0.0f)) > SseFloat(0.5f);
  SseFloat x(10.0f);
  SseFloat y(20.0f);
  SseFloat result = dfm::FloatTraits<SseFloat>::conditional(mask, x, y);
  EXPECT_EQ(lane(result, 0), 10.0f);
  EXPECT_EQ(lane(result, 1), 20.0f);
  EXPECT_EQ(lane(result, 2), 10.0f);
  EXPECT_EQ(lane(result, 3), 20.0f);
}

TEST(SseFloatTraits, Fma) {
  SseFloat a(2.0f), b(3.0f), c(4.0f);
  SseFloat result = dfm::FloatTraits<SseFloat>::fma(a, b, c);
  EXPECT_EQ(lane(result, 0), 10.0f);
}

TEST(SseFloatTraits, MinMax) {
  SseFloat a = make4(1.0f, 5.0f, 3.0f, 7.0f);
  SseFloat b = make4(4.0f, 2.0f, 6.0f, 0.0f);
  SseFloat mn = dfm::FloatTraits<SseFloat>::min(a, b);
  SseFloat mx = dfm::FloatTraits<SseFloat>::max(a, b);
  EXPECT_EQ(lane(mn, 0), 1.0f);
  EXPECT_EQ(lane(mn, 1), 2.0f);
  EXPECT_EQ(lane(mx, 0), 4.0f);
  EXPECT_EQ(lane(mx, 1), 5.0f);
}

// ---- Util functions ----

TEST(SseUtil, FloorSmall) {
  __m128 x = make4(1.5f, -1.5f, 2.0f, 0.1f);
  __m128 result = dfm::floor_small(x);
  EXPECT_EQ(lane(result, 0), 1.0f);
  EXPECT_EQ(lane(result, 1), -2.0f);
  EXPECT_EQ(lane(result, 2), 2.0f);
  EXPECT_EQ(lane(result, 3), 0.0f);
}

TEST(SseUtil, ConvertToInt) {
  __m128 x = make4(1.6f, -1.6f, 2.5f, 0.4f);
  __m128i result = dfm::convert_to_int(x);
  EXPECT_EQ(lane(result, 0), 2); // Round to nearest even
  EXPECT_EQ(lane(result, 1), -2);
  EXPECT_EQ(lane(result, 2), 2); // Round to nearest even: 2.5 → 2
  EXPECT_EQ(lane(result, 3), 0);
}

TEST(SseUtil, Gather) {
  float table[] = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f};
  __m128i indices = _mm_set_epi32(4, 2, 0, 1);
  __m128 result = dfm::gather<SseFloat>(table, indices);
  EXPECT_EQ(lane(result, 0), 20.0f);
  EXPECT_EQ(lane(result, 1), 10.0f);
  EXPECT_EQ(lane(result, 2), 30.0f);
  EXPECT_EQ(lane(result, 3), 50.0f);
}

TEST(SseUtil, IntDivBy3) {
  __m128i a = _mm_set_epi32(30, 9, 6, 3);
  __m128i result = dfm::int_div_by_3(a);
  EXPECT_EQ(lane(result, 0), 1);
  EXPECT_EQ(lane(result, 1), 2);
  EXPECT_EQ(lane(result, 2), 3);
  EXPECT_EQ(lane(result, 3), 10);
}

TEST(SseUtil, Signof) {
  __m128 x = make4(1.0f, -1.0f, 0.0f, -0.0f);
  __m128 result = dfm::signof(x);
  EXPECT_EQ(lane(result, 0), 1.0f);
  EXPECT_EQ(lane(result, 1), -1.0f);
  EXPECT_EQ(lane(result, 2), 1.0f);
  EXPECT_EQ(lane(result, 3), -1.0f);
}

TEST(SseUtil, Signofi) {
  __m128i a = _mm_set_epi32(-5, 0, 10, -1);
  __m128i result = dfm::signofi<SseFloat>(SseInt32(a));
  EXPECT_EQ(lane(result, 0), -1);
  EXPECT_EQ(lane(result, 1), 1);
  EXPECT_EQ(lane(result, 2), 1);
  EXPECT_EQ(lane(result, 3), -1);
}

#else // !defined(__SSE4_1__)

// Dummy test so the binary has at least one test on non-SSE platforms.
TEST(SseFloat, NotAvailable) {
  GTEST_SKIP() << "SSE4.1 not available";
}

#endif // defined(__SSE4_1__)


================================================
FILE: tests/fast_math/tan_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

TEST(Tan, SpecialValues) {
  constexpr float kInf = std::numeric_limits<float>::infinity();
  constexpr auto kNaN = std::numeric_limits<float>::quiet_NaN();

  EXPECT_NE(dispenso::fast_math::tan(-kInf), dispenso::fast_math::tan(-kInf));
  EXPECT_NE(dispenso::fast_math::tan(kInf), dispenso::fast_math::tan(kInf));
  EXPECT_NE(dispenso::fast_math::tan(kNaN), dispenso::fast_math::tan(kNaN));
  EXPECT_EQ(dispenso::fast_math::tan(0.0f), 0.0f);
}

constexpr uint32_t kTanAccurateUlps = 3;
constexpr uint32_t kTanAccurateUlpsLg = 4;

TEST(TanLessAccurate, Range8Pi) {
  auto result =
      dispenso::fast_math::evalAccuracy(::tanf, dispenso::fast_math::tan<float>, -8 * kPi, 8 * kPi);

  EXPECT_LE(result, kTanAccurateUlps);
}

TEST(TanLessAccurate, Range1KPi) {
  auto result = dispenso::fast_math::evalAccuracy(
      ::tanf, dispenso::fast_math::tan<float>, -(1 << 10) * kPi, -8 * kPi);
  EXPECT_LE(result, kTanAccurateUlps);

  result = dispenso::fast_math::evalAccuracy(
      ::tanf, dispenso::fast_math::tan<float>, 8 * kPi, (1 << 10) * kPi);
  EXPECT_LE(result, kTanAccurateUlps);
}

TEST(TanLessAccurate, Range32KPi) {
  auto result = dispenso::fast_math::evalAccuracy(
      ::tanf, dispenso::fast_math::tan<float>, -(1 << 15) * kPi, -(1 << 10) * kPi);
  EXPECT_LE(result, kTanAccurateUlps);

  result = dispenso::fast_math::evalAccuracy(
      ::tanf, dispenso::fast_math::tan<float>, (1 << 10) * kPi, (1 << 15) * kPi);
  EXPECT_LE(result, kTanAccurateUlps);
}

auto tan_accurate = dispenso::fast_math::tan<float, dispenso::fast_math::MaxAccuracyTraits>;

TEST(TanAccurate, Range2MPi) {
  auto result =
      dispenso::fast_math::evalAccuracy(::tanf, tan_accurate, -(1 << 20) * kPi, -(1 << 15) * kPi);
  EXPECT_LE(result, kTanAccurateUlpsLg);

  result =
      dispenso::fast_math::evalAccuracy(::tanf, tan_accurate, (1 << 15) * kPi, (1 << 20) * kPi);
  EXPECT_LE(result, kTanAccurateUlpsLg);
}

// Wrapper for MaxAccuracyTraits — macro instantiates func<Flt>.
template <typename Flt>
Flt tan_max(Flt x) {
  return dfm::tan<Flt, dfm::MaxAccuracyTraits>(x);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
FAST_MATH_ACCURACY_TESTS(
    TanDefaultAll,
    ::tanf,
    dfm::tan,
    -(1 << 15) * kPi,
    (1 << 15) * kPi,
    kTanAccurateUlps)
FAST_MATH_ACCURACY_TESTS(
    TanMaxAccAll,
    ::tanf,
    tan_max,
    -(1 << 20) * kPi,
    (1 << 20) * kPi,
    kTanAccurateUlpsLg)

// Special values tested across all SIMD backends.
static const float kTanSpecials[] = {
    0.0f,
    -0.0f,
    kPi_4,
    -kPi_4,
    1.0f,
    -1.0f,
    50.0f,
    -50.0f,
    200.0f,
    -200.0f,
    std::numeric_limits<float>::quiet_NaN(),
    std::numeric_limits<float>::infinity(),
    -std::numeric_limits<float>::infinity()};
FAST_MATH_SPECIAL_TESTS(TanDefaultSpecial, ::tanf, dfm::tan, kTanSpecials, kTanAccurateUlps)
FAST_MATH_SPECIAL_TESTS(TanMaxAccSpecial, ::tanf, tan_max, kTanSpecials, kTanAccurateUlpsLg)


================================================
FILE: tests/fast_math/tanh_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/fast_math.h>

#include "simd_test_utils.h"

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;
using namespace dispenso::fast_math::testing;

static float gt_tanh(float x) {
  return static_cast<float>(std::tanh(static_cast<double>(x)));
}

TEST(Tanh, SpecialValues) {
  EXPECT_EQ(dfm::tanh(0.0f), 0.0f);
  EXPECT_FLOAT_EQ(dfm::tanh(1.0f), gt_tanh(1.0f));
  EXPECT_FLOAT_EQ(dfm::tanh(-1.0f), gt_tanh(-1.0f));
}

TEST(Tanh, Saturation) {
  // For |x| >= ~9, tanh rounds to ±1 in float.
  EXPECT_EQ(dfm::tanh(10.0f), 1.0f);
  EXPECT_EQ(dfm::tanh(-10.0f), -1.0f);
  EXPECT_EQ(dfm::tanh(100.0f), 1.0f);
  EXPECT_EQ(dfm::tanh(-100.0f), -1.0f);
}

TEST(Tanh, NearZero) {
  // tanh(x) ≈ x for small x. Verify no cancellation.
  float xs[] = {1e-7f, -1e-7f, 1e-5f, -1e-5f, 1e-3f, -1e-3f, 0.01f, -0.01f, 0.1f, -0.1f};
  for (float x : xs) {
    float expected = gt_tanh(x);
    float result = dfm::tanh(x);
    uint32_t dist = dfm::float_distance(expected, result);
    EXPECT_LE(dist, 2u) << "tanh(" << x << "): expected=" << expected << " got=" << result;
  }
}

TEST(Tanh, RangeSmall) {
  uint32_t ulps = dfm::evalAccuracy(gt_tanh, dfm::tanh<float>, -1.0f, 1.0f);
  EXPECT_LE(ulps, 2u);
}

TEST(Tanh, RangeFull) {
  uint32_t ulps = dfm::evalAccuracy(gt_tanh, dfm::tanh<float>, -10.0f, 10.0f);
  EXPECT_LE(ulps, 2u);
}

// Unified accuracy tests — scalar + all SIMD backends, same threshold.
constexpr uint32_t kTanhMaxUlps = 2;
FAST_MATH_ACCURACY_TESTS(TanhAll, gt_tanh, dfm::tanh, -10.0f, 10.0f, kTanhMaxUlps)

// Special values tested across all SIMD backends.
// tanh SIMD doesn't propagate NaN — only test in-domain values + saturation.
static const float kTanhSpecials[] = {
    0.0f,
    -0.0f,
    0.5f,
    -0.5f,
    1.0f,
    -1.0f,
    5.0f,
    -5.0f,
    10.0f,
    -10.0f,
    100.0f,
    -100.0f,
    0.01f,
    -0.01f,
    1e-7f,
    -1e-7f};
FAST_MATH_SPECIAL_TESTS(TanhSpecial, gt_tanh, dfm::tanh, kTanhSpecials, kTanhMaxUlps)


================================================
FILE: tests/fast_math/test_main.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Rosetta detection for fast_math SIMD tests.
// When running under Rosetta (x86 emulation on ARM Mac), Cody-Waite range
// reduction accuracy breaks because FMA is emulated as separate multiply+add.
// This file registers a GoogleTest environment that disables all tests when
// Rosetta is detected.

#if defined(__APPLE__) && defined(__x86_64__)
#include <sys/sysctl.h>
#include <cstdio>

#include <gtest/gtest.h>

static bool isRosetta() {
  int ret = 0;
  size_t size = sizeof(ret);
  return sysctlbyname("sysctl.proc_translated", &ret, &size, nullptr, 0) == 0 && ret == 1;
}

class RosettaGuard : public testing::Environment {
 public:
  void SetUp() override {
    if (isRosetta()) {
      printf("Rosetta detected: skipping fast_math SIMD tests.\n");
      printf("FMA emulation breaks Cody-Waite range reduction accuracy.\n");
      printf("Run on native ARM or native x86 hardware.\n");
      // Set filter to match no tests, effectively skipping all.
      testing::GTEST_FLAG(filter) = "DISABLED_RosettaSkip";
    }
  }
};

// Register before main() runs via global constructor.
static auto* rosetta_env [[maybe_unused]] = testing::AddGlobalTestEnvironment(new RosettaGuard());
#endif // __APPLE__ && __x86_64__


================================================
FILE: tests/fast_math/ulp_eval.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

// Exhaustive ULP evaluation tool for single-argument fast_math functions.
// Uses dispenso::parallel_for for fast evaluation across concentric bands.
// Tests both +x and -x for each representable float in each band.
//
// Usage: buck run <target>
// To add a new function, add an entry to the `funcs` vector in main().

#include <atomic>
#include <cmath>
#include <cstdint>
#include <cstdio>

#include <dispenso/parallel_for.h>

#include <dispenso/fast_math/fast_math.h>
#include <dispenso/fast_math/util.h>

namespace dfm = dispenso::fast_math;

struct Band {
  float lo, hi;
  const char* label;
};

static constexpr float kPi = 3.14159265358979323846f;

// Bands for periodic functions (sin, cos, tan).
static Band kTrigBands[] = {
    {0.0f, kPi / 2, "0 to pi/2"},
    {kPi / 2, kPi, "pi/2 to pi"},
    {kPi, 2 * kPi, "pi to 2pi"},
    {2 * kPi, 4 * kPi, "2pi to 4pi"},
    {4 * kPi, 8 * kPi, "4pi to 8pi"},
    {16 * kPi, 32 * kPi, "16pi to 32pi"},
    {64 * kPi, 128 * kPi, "64pi to 128pi"},
    {512 * kPi, 1024 * kPi, "512pi to 1024pi"},
    {65536 * kPi, 131072 * kPi, "65536pi to 131072pi"},
    {524288 * kPi, 1048576 * kPi, "524288pi to 1Mpi"},
};

// Bands for [0, FLT_MAX] functions (exp, log, cbrt, sqrt).
static Band kPositiveBands[] = {
    {0.0f, 1.0f, "0 to 1"},
    {1.0f, 10.0f, "1 to 10"},
    {10.0f, 100.0f, "10 to 100"},
    {100.0f, 1000.0f, "100 to 1000"},
    {1000.0f, 1e6f, "1e3 to 1e6"},
    {1e6f, 1e10f, "1e6 to 1e10"},
    {1e10f, 1e20f, "1e10 to 1e20"},
    {1e20f, 1e30f, "1e20 to 1e30"},
    {1e30f, 1e38f, "1e30 to 1e38"},
};

// Bands for [-1, 1] functions (asin, acos).
static Band kUnitBands[] = {
    {0.0f, 0.25f, "0 to 0.25"},
    {0.25f, 0.5f, "0.25 to 0.5"},
    {0.5f, 0.75f, "0.5 to 0.75"},
    {0.75f, 1.0f, "0.75 to 1.0"},
};

// Bands for [0, FLT_MAX] functions (atan).
static Band kAtanBands[] = {
    {0.0f, 0.5f, "0 to 0.5"},
    {0.5f, 1.0f, "0.5 to 1"},
    {1.0f, 2.0f, "1 to 2"},
    {2.0f, 10.0f, "2 to 10"},
    {10.0f, 100.0f, "10 to 100"},
    {100.0f, 1e6f, "100 to 1e6"},
    {1e6f, 1e20f, "1e6 to 1e20"},
    {1e20f, 1e38f, "1e20 to 1e38"},
};

using FnType = float (*)(float);

static void evalBands(const char* name, FnType gt, FnType fn, Band* bands, int32_t numBands) {
  printf("%-20s  %5s  %8s  %-s\n", name, "ULP", ">=3ULP", "Worst input");
  for (int32_t bi = 0; bi < numBands; ++bi) {
    const auto& b = bands[bi];
    uint32_t lo_bits = dfm::bit_cast<uint32_t>(b.lo);
    uint32_t hi_bits = dfm::bit_cast<uint32_t>(b.hi);
    uint32_t count = hi_bits - lo_bits + 1;

    std::atomic<uint32_t> maxUlp{0};
    std::atomic<float> worstX{0.0f};
    std::atomic<uint32_t> count3{0};

    dispenso::parallel_for(
        dispenso::makeChunkedRange(0u, count, 64u * 1024u), [&](uint32_t begin, uint32_t end) {
          uint32_t localMax = 0;
          float localWorst = 0.0f;
          uint32_t localCount3 = 0;
          for (uint32_t idx = begin; idx < end; ++idx) {
            uint32_t bits = lo_bits + idx;
            float f = dfm::bit_cast<float>(bits);
            for (float x : {f, -f}) {
              float g = gt(x);
              float a = fn(x);
              // Skip non-finite results: NaN/inf comparisons via float_distance
              // produce garbage. Functions with asymmetric domains (log1p) may
              // produce NaN for mirrored inputs — this is expected, not an error.
              if (!std::isfinite(g) || !std::isfinite(a))
                continue;
              uint32_t d = dfm::float_distance(g, a);
              if (d >= 3)
                localCount3++;
              if (d > localMax) {
                localMax = d;
                localWorst = x;
              }
            }
          }
          count3.fetch_add(localCount3, std::memory_order_relaxed);
          uint32_t prev = maxUlp.load(std::memory_order_relaxed);
          while (localMax > prev) {
            if (maxUlp.compare_exchange_weak(prev, localMax, std::memory_order_relaxed)) {
              worstX.store(localWorst, std::memory_order_relaxed);
              break;
            }
          }
        });

    printf("  %-18s  %5u  %8u  x=%.9g\n", b.label, maxUlp.load(), count3.load(), worstX.load());
  }
}

template <int32_t N>
static void eval(const char* name, FnType gt, FnType fn, Band (&bands)[N]) {
  evalBands(name, gt, fn, bands, N);
}

// Ground truth wrappers — use long double on macOS for better reference accuracy.
static float gt_sin(float x) {
#if defined(__APPLE__)
  return static_cast<float>(::sinl(x));
#else
  return ::sinf(x);
#endif
}
static float gt_cos(float x) {
#if defined(__APPLE__)
  return static_cast<float>(::cosl(x));
#else
  return ::cosf(x);
#endif
}
static float gt_tan(float x) {
  return static_cast<float>(::tanl(x));
}
static float gt_asin(float x) {
  return ::asinf(x);
}
static float gt_acos(float x) {
  return ::acosf(x);
}
static float gt_atan(float x) {
  return ::atanf(x);
}
static float gt_exp(float x) {
  return ::expf(x);
}
static float gt_exp2(float x) {
  return ::exp2f(x);
}
static float gt_log(float x) {
  return ::logf(x);
}
static float gt_log2(float x) {
  return ::log2f(x);
}
static float gt_log10(float x) {
  return ::log10f(x);
}
static float gt_cbrt(float x) {
  return ::cbrtf(x);
}
static float gt_expm1(float x) {
  return static_cast<float>(std::expm1(static_cast<double>(x)));
}
static float gt_log1p(float x) {
  return static_cast<float>(std::log1p(static_cast<double>(x)));
}
static float gt_tanh(float x) {
  return static_cast<float>(std::tanh(static_cast<double>(x)));
}
static float gt_erf(float x) {
  return static_cast<float>(std::erf(static_cast<double>(x)));
}

// Bands for exp functions — [-89, 89] covers the non-overflow range.
static Band kExpBands[] = {
    {0.0f, 1.0f, "0 to 1"},
    {1.0f, 10.0f, "1 to 10"},
    {10.0f, 40.0f, "10 to 40"},
    {40.0f, 89.0f, "40 to 89"},
};

// Bands for expm1 — near-zero focus, plus full exp range.
// The mirrored negative side is also valid (expm1(-89) = -1).
static Band kExpm1Bands[] = {
    {0.0f, 0.001f, "0 to 0.001"},
    {0.001f, 0.1f, "0.001 to 0.1"},
    {0.1f, 1.0f, "0.1 to 1"},
    {1.0f, 10.0f, "1 to 10"},
    {10.0f, 89.0f, "10 to 89"},
};

// Bands for log1p — covers [0, 1e38] and mirrors to [-1e38, 0].
// For x < -1, log1p is undefined and both gt/fn return NaN, which the
// eval loop skips. So mirroring large bands is safe.
static Band kLog1pBands[] = {
    {0.0f, 0.001f, "0 to 0.001"},
    {0.001f, 0.1f, "0.001 to 0.1"},
    {0.1f, 1.0f, "0.1 to 1"},
    {1.0f, 100.0f, "1 to 100"},
    {100.0f, 1e10f, "100 to 1e10"},
    {1e10f, 1e30f, "1e10 to 1e30"},
    {1e30f, 1e38f, "1e30 to 1e38"},
};

// Bands for tanh — saturates to ±1 at ~±9.
static Band kTanhBands[] = {
    {0.0f, 0.001f, "0 to 0.001"},
    {0.001f, 0.1f, "0.001 to 0.1"},
    {0.1f, 1.0f, "0.1 to 1"},
    {1.0f, 5.0f, "1 to 5"},
    {5.0f, 10.0f, "5 to 10"},
};

static Band kErfBands[] = {
    {0.0f, 0.001f, "0 to 0.001"},
    {0.001f, 0.1f, "0.001 to 0.1"},
    {0.1f, 0.5f, "0.1 to 0.5"},
    {0.5f, 1.0f, "0.5 to 1"},
    {1.0f, 2.0f, "1 to 2"},
    {2.0f, 3.0f, "2 to 3"},
    {3.0f, 3.92f, "3 to 3.92"},
};

int main() {
  printf("=== Trigonometric ===\n");
  eval("sin", gt_sin, dfm::sin<float>, kTrigBands);
  printf("\n");
  eval("cos", gt_cos, dfm::cos<float>, kTrigBands);
  printf("\n");
  eval("tan", gt_tan, dfm::tan<float>, kTrigBands);

  printf("\n=== Inverse Trig ===\n");
  eval("asin", gt_asin, dfm::asin<float>, kUnitBands);
  printf("\n");
  eval("acos", gt_acos, dfm::acos<float>, kUnitBands);
  printf("\n");
  eval("atan", gt_atan, dfm::atan<float>, kAtanBands);

  printf("\n=== Exponential ===\n");
  eval("exp", gt_exp, dfm::exp<float>, kExpBands);
  printf("\n");
  eval("exp2", gt_exp2, dfm::exp2<float>, kExpBands);

  printf("\n=== Logarithmic ===\n");
  eval("log", gt_log, dfm::log<float>, kPositiveBands);
  printf("\n");
  eval("log2", gt_log2, dfm::log2<float>, kPositiveBands);
  printf("\n");
  eval("log10", gt_log10, dfm::log10<float>, kPositiveBands);

  printf("\n=== Other ===\n");
  eval("cbrt", gt_cbrt, dfm::cbrt<float>, kPositiveBands);

  printf("\n=== Precision-near-zero ===\n");
  eval("expm1", gt_expm1, dfm::expm1<float>, kExpm1Bands);
  printf("\n");
  eval("log1p", gt_log1p, dfm::log1p<float>, kLog1pBands);
  printf("\n");
  eval("tanh", gt_tanh, dfm::tanh<float>, kTanhBands);
  printf("\n");
  eval("erf", gt_erf, dfm::erf<float>, kErfBands);

  return 0;
}


================================================
FILE: tests/fast_math/util_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/fast_math/util.h>

#include <cmath>
#include <limits>

#include <gtest/gtest.h>

namespace dfm = dispenso::fast_math;

// ---- bit_cast ----

TEST(BitCast, FloatToUint) {
  EXPECT_EQ(dfm::bit_cast<uint32_t>(0.0f), 0u);
  EXPECT_EQ(dfm::bit_cast<uint32_t>(-0.0f), 0x80000000u);
  EXPECT_EQ(dfm::bit_cast<uint32_t>(1.0f), 0x3f800000u);
  EXPECT_EQ(dfm::bit_cast<uint32_t>(-1.0f), 0xbf800000u);
}

TEST(BitCast, UintToFloat) {
  EXPECT_EQ(dfm::bit_cast<float>(0x3f800000u), 1.0f);
  EXPECT_EQ(dfm::bit_cast<float>(0x40000000u), 2.0f);
}

TEST(BitCast, RoundTrip) {
  float vals[] = {0.0f, 1.0f, -1.0f, 3.14f, -2.718f, std::numeric_limits<float>::infinity()};
  for (float v : vals) {
    EXPECT_EQ(dfm::bit_cast<float>(dfm::bit_cast<uint32_t>(v)), v);
    EXPECT_EQ(dfm::bit_cast<float>(dfm::bit_cast<int32_t>(v)), v);
  }
}

// ---- float_distance ----

TEST(FloatDistance, IdenticalValues) {
  EXPECT_EQ(dfm::float_distance(1.0f, 1.0f), 0u);
  EXPECT_EQ(dfm::float_distance(0.0f, 0.0f), 0u);
  EXPECT_EQ(dfm::float_distance(-1.0f, -1.0f), 0u);
}

TEST(FloatDistance, OneUlp) {
  float a = 1.0f;
  float b = std::nextafter(a, 2.0f);
  EXPECT_EQ(dfm::float_distance(a, b), 1u);
  EXPECT_EQ(dfm::float_distance(b, a), 1u);
}

TEST(FloatDistance, SymmetricForPositive) {
  float a = 1.0f;
  float b = 1.5f;
  EXPECT_EQ(dfm::float_distance(a, b), dfm::float_distance(b, a));
}

TEST(FloatDistance, DenormalsAreZero) {
  // Denormals should be treated as zero
  float denorm = std::numeric_limits<float>::denorm_min();
  EXPECT_EQ(dfm::float_distance(denorm, 0.0f), 0u);
}

// ---- signof ----

TEST(Signof, PositiveValues) {
  EXPECT_EQ(dfm::signof(1.0f), 1.0f);
  EXPECT_EQ(dfm::signof(42.0f), 1.0f);
  EXPECT_EQ(dfm::signof(0.001f), 1.0f);
}

TEST(Signof, NegativeValues) {
  EXPECT_EQ(dfm::signof(-1.0f), -1.0f);
  EXPECT_EQ(dfm::signof(-42.0f), -1.0f);
  EXPECT_EQ(dfm::signof(-0.001f), -1.0f);
}

TEST(Signof, Zero) {
  // +0 should give +1, -0 should give -1
  EXPECT_EQ(dfm::signof(0.0f), 1.0f);
  EXPECT_EQ(dfm::signof(-0.0f), -1.0f);
}

// ---- signofi ----

TEST(Signofi, PositiveValues) {
  EXPECT_EQ((dfm::signofi<float>(1)), 1);
  EXPECT_EQ((dfm::signofi<float>(100)), 1);
}

TEST(Signofi, NegativeValues) {
  EXPECT_EQ((dfm::signofi<float>(-1)), -1);
  EXPECT_EQ((dfm::signofi<float>(-100)), -1);
}

TEST(Signofi, Zero) {
  EXPECT_EQ((dfm::signofi<float>(0)), 1);
}

// ---- nonnormal ----

TEST(Nonnormal, IntVersion) {
  // Inf: exponent all 1s, mantissa 0
  int32_t inf_bits = dfm::bit_cast<int32_t>(std::numeric_limits<float>::infinity());
  EXPECT_TRUE(dfm::nonnormal<float>(inf_bits));

  // NaN: exponent all 1s, mantissa nonzero
  int32_t nan_bits = dfm::bit_cast<int32_t>(std::numeric_limits<float>::quiet_NaN());
  EXPECT_TRUE(dfm::nonnormal<float>(nan_bits));

  // Normal number
  int32_t one_bits = dfm::bit_cast<int32_t>(1.0f);
  EXPECT_FALSE(dfm::nonnormal<float>(one_bits));

  int32_t neg_one_bits = dfm::bit_cast<int32_t>(-1.0f);
  EXPECT_FALSE(dfm::nonnormal<float>(neg_one_bits));
}

TEST(Nonnormal, FloatVersion) {
  EXPECT_TRUE(dfm::nonnormal<float>(std::numeric_limits<float>::infinity()));
  EXPECT_TRUE(dfm::nonnormal<float>(-std::numeric_limits<float>::infinity()));
  EXPECT_TRUE(dfm::nonnormal<float>(std::numeric_limits<float>::quiet_NaN()));
  EXPECT_FALSE(dfm::nonnormal<float>(1.0f));
  EXPECT_FALSE(dfm::nonnormal<float>(-42.0f));
}

// ---- nonnormalOrZero ----

TEST(NonnormalOrZero, Normal) {
  int32_t bits = dfm::bit_cast<int32_t>(1.0f);
  EXPECT_FALSE(dfm::nonnormalOrZero<float>(bits));
  bits = dfm::bit_cast<int32_t>(-42.0f);
  EXPECT_FALSE(dfm::nonnormalOrZero<float>(bits));
}

TEST(NonnormalOrZero, Zero) {
  int32_t bits = dfm::bit_cast<int32_t>(0.0f);
  EXPECT_TRUE(dfm::nonnormalOrZero<float>(bits));
}

TEST(NonnormalOrZero, Inf) {
  int32_t bits = dfm::bit_cast<int32_t>(std::numeric_limits<float>::infinity());
  EXPECT_TRUE(dfm::nonnormalOrZero<float>(bits));
}

TEST(NonnormalOrZero, NaN) {
  int32_t bits = dfm::bit_cast<int32_t>(std::numeric_limits<float>::quiet_NaN());
  EXPECT_TRUE(dfm::nonnormalOrZero<float>(bits));
}

// ---- convert_to_int ----

TEST(ConvertToInt, BasicValues) {
  EXPECT_EQ(dfm::convert_to_int(0.0f), 0);
  EXPECT_EQ(dfm::convert_to_int(1.0f), 1);
  EXPECT_EQ(dfm::convert_to_int(-1.0f), -1);
  EXPECT_EQ(dfm::convert_to_int(2.0f), 2);
  EXPECT_EQ(dfm::convert_to_int(-2.0f), -2);
}

TEST(ConvertToInt, Rounding) {
  // SSE uses round-to-nearest-even (banker's rounding)
  EXPECT_EQ(dfm::convert_to_int(1.4f), 1);
  EXPECT_EQ(dfm::convert_to_int(1.6f), 2);
  EXPECT_EQ(dfm::convert_to_int(-1.4f), -1);
  EXPECT_EQ(dfm::convert_to_int(-1.6f), -2);
  // Banker's rounding: 0.5 rounds to nearest even
  EXPECT_EQ(dfm::convert_to_int(0.5f), 0); // 0.5 → 0 (even)
  EXPECT_EQ(dfm::convert_to_int(1.5f), 2); // 1.5 → 2 (even)
  EXPECT_EQ(dfm::convert_to_int(2.5f), 2); // 2.5 → 2 (even)
  EXPECT_EQ(dfm::convert_to_int(3.5f), 4); // 3.5 → 4 (even)
  EXPECT_EQ(dfm::convert_to_int(-0.5f), 0);
  EXPECT_EQ(dfm::convert_to_int(-1.5f), -2);
}

TEST(ConvertToInt, LargeValues) {
  EXPECT_EQ(dfm::convert_to_int(100.0f), 100);
  EXPECT_EQ(dfm::convert_to_int(-100.0f), -100);
  EXPECT_EQ(dfm::convert_to_int(1000.0f), 1000);
}

// ---- convert_to_int_clamped ----

TEST(ConvertToIntClamped, WithinRange) {
  EXPECT_EQ((dfm::convert_to_int_clamped<float, -10, 10>(5.0f)), 5);
  EXPECT_EQ((dfm::convert_to_int_clamped<float, -10, 10>(-5.0f)), -5);
  EXPECT_EQ((dfm::convert_to_int_clamped<float, -10, 10>(0.0f)), 0);
}

TEST(ConvertToIntClamped, AtBounds) {
  EXPECT_EQ((dfm::convert_to_int_clamped<float, -10, 10>(10.0f)), 10);
  EXPECT_EQ((dfm::convert_to_int_clamped<float, -10, 10>(-10.0f)), -10);
}

TEST(ConvertToIntClamped, ClampedAbove) {
  EXPECT_LE((dfm::convert_to_int_clamped<float, -10, 10>(100.0f)), 10);
}

TEST(ConvertToIntClamped, ClampedBelow) {
  EXPECT_GE((dfm::convert_to_int_clamped<float, -10, 10>(-100.0f)), -10);
}

// ---- floor_small ----

TEST(FloorSmall, Integers) {
  EXPECT_EQ(dfm::floor_small(0.0f), 0.0f);
  EXPECT_EQ(dfm::floor_small(1.0f), 1.0f);
  EXPECT_EQ(dfm::floor_small(-1.0f), -1.0f);
  EXPECT_EQ(dfm::floor_small(5.0f), 5.0f);
}

TEST(FloorSmall, PositiveFractions) {
  EXPECT_EQ(dfm::floor_small(1.5f), 1.0f);
  EXPECT_EQ(dfm::floor_small(1.9f), 1.0f);
  EXPECT_EQ(dfm::floor_small(1.1f), 1.0f);
  EXPECT_EQ(dfm::floor_small(0.5f), 0.0f);
}

TEST(FloorSmall, NegativeFractions) {
  EXPECT_EQ(dfm::floor_small(-0.5f), -1.0f);
  EXPECT_EQ(dfm::floor_small(-1.5f), -2.0f);
  EXPECT_EQ(dfm::floor_small(-1.1f), -2.0f);
  EXPECT_EQ(dfm::floor_small(-0.1f), -1.0f);
}

TEST(FloorSmall, MatchesStdFloor) {
  float vals[] = {
      0.0f,
      0.1f,
      0.5f,
      0.9f,
      1.0f,
      1.5f,
      2.7f,
      100.3f,
      -0.1f,
      -0.5f,
      -0.9f,
      -1.0f,
      -1.5f,
      -2.7f,
      -100.3f};
  for (float v : vals) {
    EXPECT_EQ(dfm::floor_small(v), std::floor(v)) << "Mismatch for " << v;
  }
}

// ---- min ----

TEST(Min, BasicValues) {
  EXPECT_EQ(dfm::min<float>(1.0f, 2.0f), 1.0f);
  EXPECT_EQ(dfm::min<float>(2.0f, 1.0f), 1.0f);
  EXPECT_EQ(dfm::min<float>(-1.0f, 1.0f), -1.0f);
  EXPECT_EQ(dfm::min<float>(5.0f, 5.0f), 5.0f);
}

TEST(Min, NanBehavior) {
  // If first arg is NaN and second is not, should return second
  float nan = std::numeric_limits<float>::quiet_NaN();
  EXPECT_EQ(dfm::min<float>(nan, 1.0f), 1.0f);
}

// ---- clamp_allow_nan ----

TEST(ClampAllowNan, WithinRange) {
  EXPECT_EQ(dfm::clamp_allow_nan<float>(0.5f, 0.0f, 1.0f), 0.5f);
}

TEST(ClampAllowNan, BelowMin) {
  EXPECT_EQ(dfm::clamp_allow_nan<float>(-1.0f, 0.0f, 1.0f), 0.0f);
}

TEST(ClampAllowNan, AboveMax) {
  EXPECT_EQ(dfm::clamp_allow_nan<float>(2.0f, 0.0f, 1.0f), 1.0f);
}

TEST(ClampAllowNan, NanInput) {
  float nan = std::numeric_limits<float>::quiet_NaN();
  float result = dfm::clamp_allow_nan<float>(nan, 0.0f, 1.0f);
  // "allow_nan" refers to handling NaN in the bounds, not clamping NaN input.
  // With NaN input on SSE, NaN propagates through min/max chain.
  EXPECT_TRUE(std::isnan(result));
}

// ---- clamp_no_nan ----

TEST(ClampNoNan, WithinRange) {
  EXPECT_EQ(dfm::clamp_no_nan<float>(0.5f, 0.0f, 1.0f), 0.5f);
}

TEST(ClampNoNan, BelowMin) {
  EXPECT_EQ(dfm::clamp_no_nan<float>(-1.0f, 0.0f, 1.0f), 0.0f);
}

TEST(ClampNoNan, AboveMax) {
  EXPECT_EQ(dfm::clamp_no_nan<float>(2.0f, 0.0f, 1.0f), 1.0f);
}

TEST(ClampNoNan, AtBounds) {
  EXPECT_EQ(dfm::clamp_no_nan<float>(0.0f, 0.0f, 1.0f), 0.0f);
  EXPECT_EQ(dfm::clamp_no_nan<float>(1.0f, 0.0f, 1.0f), 1.0f);
}

// ---- gather ----

TEST(Gather, BasicLookup) {
  float table[] = {10.0f, 20.0f, 30.0f, 40.0f, 50.0f};
  EXPECT_EQ(dfm::gather<float>(table, 0), 10.0f);
  EXPECT_EQ(dfm::gather<float>(table, 2), 30.0f);
  EXPECT_EQ(dfm::gather<float>(table, 4), 50.0f);
}

// ---- nbool_as_one / bool_as_one ----

TEST(NboolAsOne, Float) {
  EXPECT_EQ((dfm::nbool_as_one<float, bool>(true)), 0.0f);
  EXPECT_EQ((dfm::nbool_as_one<float, bool>(false)), 1.0f);
}

TEST(NboolAsOne, Int) {
  EXPECT_EQ((dfm::nbool_as_one<int32_t, bool>(true)), 0);
  EXPECT_EQ((dfm::nbool_as_one<int32_t, bool>(false)), 1);
}

TEST(BoolAsOne, Float) {
  EXPECT_EQ((dfm::bool_as_one<float, bool>(true)), 1.0f);
  EXPECT_EQ((dfm::bool_as_one<float, bool>(false)), 0.0f);
}

TEST(BoolAsOne, Int) {
  EXPECT_EQ((dfm::bool_as_one<int32_t, bool>(true)), 1);
  EXPECT_EQ((dfm::bool_as_one<int32_t, bool>(false)), 0);
}

// ---- bool_as_mask ----

TEST(BoolAsMask, TrueGivesAllOnes) {
  int32_t mask = dfm::bool_as_mask<int32_t, bool>(true);
  EXPECT_EQ(mask, static_cast<int32_t>(0xFFFFFFFF));
}

TEST(BoolAsMask, FalseGivesZero) {
  int32_t mask = dfm::bool_as_mask<int32_t, bool>(false);
  EXPECT_EQ(mask, 0);
}

// ---- bool_apply_or_zero ----

TEST(BoolApplyOrZero, TrueReturnsValue) {
  EXPECT_EQ((dfm::bool_apply_or_zero<int32_t>(true, 42)), 42);
  EXPECT_EQ((dfm::bool_apply_or_zero<int32_t>(true, -7)), -7);
}

TEST(BoolApplyOrZero, FalseReturnsZero) {
  EXPECT_EQ((dfm::bool_apply_or_zero<int32_t>(false, 42)), 0);
  EXPECT_EQ((dfm::bool_apply_or_zero<int32_t>(false, -7)), 0);
}

TEST(BoolApplyOrZero, FloatTrue) {
  float result = dfm::bool_apply_or_zero<float>(true, 3.14f);
  EXPECT_EQ(result, 3.14f);
}

TEST(BoolApplyOrZero, FloatFalse) {
  float result = dfm::bool_apply_or_zero<float>(false, 3.14f);
  EXPECT_EQ(result, 0.0f);
}

// ---- int_div_by_3 ----

TEST(IntDivBy3, ExactMultiples) {
  EXPECT_EQ(dfm::int_div_by_3(0), 0);
  EXPECT_EQ(dfm::int_div_by_3(3), 1);
  EXPECT_EQ(dfm::int_div_by_3(6), 2);
  EXPECT_EQ(dfm::int_div_by_3(9), 3);
  EXPECT_EQ(dfm::int_div_by_3(30), 10);
  EXPECT_EQ(dfm::int_div_by_3(300), 100);
}

TEST(IntDivBy3, WithRemainder) {
  // Truncation toward zero like integer division
  EXPECT_EQ(dfm::int_div_by_3(1), 0);
  EXPECT_EQ(dfm::int_div_by_3(2), 0);
  EXPECT_EQ(dfm::int_div_by_3(4), 1);
  EXPECT_EQ(dfm::int_div_by_3(5), 1);
  EXPECT_EQ(dfm::int_div_by_3(7), 2);
  EXPECT_EQ(dfm::int_div_by_3(8), 2);
  EXPECT_EQ(dfm::int_div_by_3(10), 3);
}

TEST(IntDivBy3, LargeValues) {
  // Values used in cbrt magic constant computations
  for (int32_t i = 0; i < 10000; i += 7) {
    EXPECT_EQ(dfm::int_div_by_3(i), i / 3) << "Mismatch at " << i;
  }
}


================================================
FILE: tests/for_each_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <deque>
#include <list>
#include <numeric>
#include <set>
#include <unordered_map>
#include <vector>

#include <dispenso/for_each.h>

#include <gtest/gtest.h>

#include "test_tid.h"

template <typename Container>
void forEachTestHelper() {
  constexpr size_t kNumVals = 1 << 15;
  Container c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace_back(i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each(
      std::begin(c), std::end(c), [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEach, Vector) {
  forEachTestHelper<std::vector<size_t>>();
}

TEST(ForEach, List) {
  forEachTestHelper<std::list<size_t>>();
}

TEST(ForEach, Deque) {
  forEachTestHelper<std::deque<size_t>>();
}

TEST(ForEach, Set) {
  constexpr size_t kNumVals = 1 << 12;
  std::set<size_t> c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace(i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each(
      std::begin(c), std::end(c), [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEach, UnorderedMap) {
  constexpr size_t kNumVals = 1 << 12;
  std::unordered_map<size_t, size_t> c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace(i, i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each(
      std::begin(c), std::end(c), [&validated](auto p) { validated[p.second] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

template <typename Container>
void forEachNTestHelper() {
  constexpr size_t kNumVals = 1 << 15;
  Container c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace_back(i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each_n(
      std::begin(c), kNumVals, [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEachN, Vector) {
  forEachNTestHelper<std::vector<size_t>>();
}

TEST(ForEachN, List) {
  forEachNTestHelper<std::list<size_t>>();
}

TEST(ForEachN, Deque) {
  forEachNTestHelper<std::deque<size_t>>();
}

TEST(ForEachN, Set) {
  constexpr size_t kNumVals = 1 << 12;
  std::set<size_t> c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace(i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each_n(
      std::begin(c), kNumVals, [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEachN, UnorderedMap) {
  constexpr size_t kNumVals = 1 << 12;
  std::unordered_map<size_t, size_t> c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace(i, i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each_n(
      std::begin(c), kNumVals, [&validated](auto p) { validated[p.second] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEach, NoWaitFewerThreads) {
  constexpr size_t kNumVals = 1 << 14;
  std::atomic<size_t> count(0);
  std::atomic<bool> canStart(false);

  std::vector<size_t> c;
  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace_back(i);
  }

  dispenso::TaskSet tasks(dispenso::globalThreadPool());

  dispenso::for_each(
      tasks,
      std::begin(c),
      std::end(c),
      [&count, &canStart](auto val) {
        while (!canStart.load(std::memory_order_acquire))
          ;

        count.fetch_add(val, std::memory_order_release);
      },
      {3, false} /* 3 threads, no wait */);

  std::this_thread::sleep_for(std::chrono::milliseconds(20));

  // Use sequential consistency to ensure that this store can't be put before the sleep, nor after
  // the wait.
  canStart.store(true, std::memory_order_seq_cst);

  tasks.wait();

  EXPECT_EQ(count.load(std::memory_order_acquire), 134209536);
}

TEST(ForEachN, NoWaitFewerThreads) {
  constexpr size_t kNumVals = 1 << 13;
  std::atomic<size_t> count(0);
  std::atomic<bool> canStart(false);

  std::vector<size_t> c;
  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace_back(i);
  }

  dispenso::TaskSet tasks(dispenso::globalThreadPool());

  dispenso::for_each_n(
      tasks,
      std::begin(c),
      kNumVals,
      [&count, &canStart](auto val) {
        while (!canStart.load(std::memory_order_acquire))
          ;

        count.fetch_add(val, std::memory_order_release);
      },
      {3, false} /* 3 threads, no wait */);

  std::this_thread::sleep_for(std::chrono::milliseconds(20));

  // Use sequential consistency to ensure that this store can't be put before the sleep, nor after
  // the wait.
  canStart.store(true, std::memory_order_seq_cst);

  tasks.wait();

  EXPECT_EQ(count.load(std::memory_order_acquire), 33550336);
}

TEST(ForEach, SmallSet) {
  constexpr size_t kNumVals = 3;
  std::set<size_t> c;

  for (size_t i = 0; i < kNumVals; ++i) {
    c.emplace(i);
  }

  std::vector<uint8_t> validated(kNumVals, 0);

  dispenso::for_each(
      std::begin(c), std::end(c), [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEach, EmptySet) {
  std::set<int> emptySet;
  dispenso::for_each(std::begin(emptySet), std::end(emptySet), [](int /*index*/) {
    EXPECT_FALSE(true) << "Should not get into this lambda";
  });
}

TEST(ForEach, References) {
  std::vector<int> values;
  for (int i = 0; i < 100; ++i) {
    values.push_back(i);
  }

  dispenso::for_each(std::begin(values), std::end(values), [](int& i) { i = -i; });

  for (int i = 0; i < 100; ++i) {
    EXPECT_EQ(values[i], -i);
  }
}

TEST(ForEach, Cascade) {
  std::vector<int> values;
  for (int i = 0; i < 100; ++i) {
    values.push_back(i);
  }

  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ForEachOptions options;
  options.wait = false;

  dispenso::for_each(
      taskSet,
      std::begin(values),
      std::end(values),
      [](int& i) {
        std::this_thread::sleep_for(std::chrono::microseconds(1));
        i = -i;
      },
      options);

  dispenso::ForEachOptions waitOptions;
  std::set<int> emptySet;
  dispenso::for_each(
      taskSet,
      std::begin(emptySet),
      std::end(emptySet),
      [](int /*index*/) { EXPECT_FALSE(true) << "Should not get into this lambda"; },
      waitOptions);

  for (int i = 0; i < 100; ++i) {
    EXPECT_EQ(values[i], -i);
  }
}

static void testMaxThreads(size_t poolSize, uint32_t maxThreads, bool testWaitOption) {
  resetTestTid();
  size_t numAvailableThreads = poolSize + testWaitOption;
  std::vector<int> threadLocalSums(numAvailableThreads, 0);
  dispenso::ThreadPool pool(poolSize);
  dispenso::TaskSet tasks(pool);

  dispenso::ForEachOptions options;
  options.maxThreads = maxThreads;
  options.wait = testWaitOption;

  constexpr size_t kLen = 9999;

  auto func = [&threadLocalSums](int index) {
    assert(index > 0); // for correctness of numNonZero
    std::this_thread::yield();
    int tid = getTestTid();
    threadLocalSums[tid] += index;
  };

  std::vector<int> v(kLen);
  std::iota(v.begin(), v.end(), 1);

  dispenso::for_each_n(tasks, v.cbegin(), kLen, func, options);

  if (!testWaitOption) {
    // We didn't tell the parallel_for to wait, so we need to do it here to ensure the loop is
    // complete.
    tasks.wait();
  }
  int total = 0;
  int numNonZero = 0;

  for (size_t i = 0; i < numAvailableThreads; ++i) {
    numNonZero += threadLocalSums[i] > 0;
    total += threadLocalSums[i];
  }

  // 0 indicates serial execution per API spec
  size_t translatedMaxThreads = maxThreads == 0 ? 1 : maxThreads;
  EXPECT_LE(numNonZero, std::min((size_t)translatedMaxThreads, numAvailableThreads));
  EXPECT_EQ(total, 49995000);
}

TEST(ForEach, OptionsMaxThreadsBigPoolBlocking) {
  constexpr bool waitOption = true;
  testMaxThreads(8, 4, waitOption);
}

TEST(ForEach, OptionsMaxThreadsBigPoolNonBlocking) {
  constexpr bool waitOption = false;
  testMaxThreads(8, 4, waitOption);
}

TEST(ForEach, OptionsMaxThreadsSmallPoolBlocking) {
  constexpr bool waitOption = true;
  testMaxThreads(4, 8, waitOption);
}

TEST(ForEach, OptionsMaxThreadsSmallPoolNonBlocking) {
  constexpr bool waitOption = false;
  testMaxThreads(4, 8, waitOption);
}

TEST(ForEach, OptionsMaxThreadsSerialBlocking) {
  constexpr bool waitOption = true;
  testMaxThreads(8, 0, waitOption);
}

TEST(ForEach, OptionsMaxThreadsSerialNonBlocking) {
  constexpr bool waitOption = false;
  testMaxThreads(8, 0, waitOption);
}

// Iterator with no default constructor — exercises the SmallVector<Iter>
// boundary list which must not require Iter to be default-constructible.
struct NoDefaultCtorIterator {
  using iterator_category = std::input_iterator_tag;
  using difference_type = std::ptrdiff_t;
  using value_type = size_t;
  using pointer = const size_t*;
  using reference = const size_t&;

  explicit NoDefaultCtorIterator(const std::vector<size_t>& v, size_t idx) : v_(&v), idx_(idx) {}

  reference operator*() const {
    return (*v_)[idx_];
  }
  NoDefaultCtorIterator& operator++() {
    ++idx_;
    return *this;
  }
  NoDefaultCtorIterator operator++(int) {
    auto tmp = *this;
    ++idx_;
    return tmp;
  }
  friend bool operator==(const NoDefaultCtorIterator& a, const NoDefaultCtorIterator& b) {
    return a.idx_ == b.idx_;
  }
  friend bool operator!=(const NoDefaultCtorIterator& a, const NoDefaultCtorIterator& b) {
    return a.idx_ != b.idx_;
  }

 private:
  const std::vector<size_t>* v_;
  size_t idx_;
};

TEST(ForEach, NonDefaultConstructibleIterator) {
  constexpr size_t kNumVals = 1 << 12;
  std::vector<size_t> data(kNumVals);
  std::iota(data.begin(), data.end(), 0);

  std::vector<uint8_t> validated(kNumVals, 0);

  NoDefaultCtorIterator begin(data, 0);
  NoDefaultCtorIterator end(data, kNumVals);

  dispenso::for_each(begin, end, [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}

TEST(ForEachN, NonDefaultConstructibleIterator) {
  constexpr size_t kNumVals = 1 << 12;
  std::vector<size_t> data(kNumVals);
  std::iota(data.begin(), data.end(), 0);

  std::vector<uint8_t> validated(kNumVals, 0);

  NoDefaultCtorIterator begin(data, 0);

  dispenso::for_each_n(begin, kNumVals, [&validated](size_t val) { validated[val] = true; });

  for (auto v : validated) {
    EXPECT_TRUE(v);
  }
}


================================================
FILE: tests/forward_shared_pool.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/thread_pool.h>

#ifdef _WIN32
__declspec(dllexport)
#else
__attribute__((visibility("default")))
#endif
void* DISPENSO_EXPORT_NAME() {
  return &dispenso::globalThreadPool();
}


================================================
FILE: tests/future_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/future.h>

#include <gtest/gtest.h>

TEST(Future, Invalid) {
  dispenso::Future<void> future;
  EXPECT_FALSE(future.valid());
  future = dispenso::make_ready_future();
  EXPECT_TRUE(future.valid());
}

TEST(Future, MakeReady) {
  auto voidFuture = dispenso::make_ready_future();
  EXPECT_TRUE(voidFuture.is_ready());
  voidFuture.get();

  int valueA = 66;
  auto intFuture = dispenso::make_ready_future(valueA);
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_EQ(valueA, intFuture.get());

  auto intRefFuture = dispenso::make_ready_future(std::ref(valueA));
  EXPECT_TRUE(intRefFuture.is_ready());
  EXPECT_EQ(valueA, intRefFuture.get());
  --valueA;
  EXPECT_EQ(valueA, intRefFuture.get());

  EXPECT_EQ(&valueA, &intRefFuture.get());
}

TEST(Future, ThreadPool) {
  int foo = 10;
  dispenso::Future<void> voidFuture([&foo]() { foo = 7; }, dispenso::globalThreadPool());
  EXPECT_TRUE(voidFuture.valid());
  voidFuture.get();
  EXPECT_EQ(foo, 7);

  dispenso::Future<int> intFuture([]() { return 33; }, dispenso::globalThreadPool());
  EXPECT_TRUE(intFuture.valid());
  int result = intFuture.get();
  EXPECT_EQ(result, 33);

  int refTo = 77;
  dispenso::Future<int&> refFuture(
      [&refTo]() -> int& { return refTo; }, dispenso::globalThreadPool());
  EXPECT_TRUE(refFuture.valid());
  int& ref = refFuture.get();
  EXPECT_EQ(&ref, &refTo);
  EXPECT_EQ(77, ref);
}

TEST(Future, ThreadPoolForceQueuing) {
  int foo = 10;
  dispenso::Future<void> voidFuture(
      [&foo]() { foo = 7; }, dispenso::globalThreadPool(), std::launch::async);
  EXPECT_TRUE(voidFuture.valid());
  voidFuture.get();
  EXPECT_EQ(foo, 7);

  dispenso::Future<int> intFuture(
      []() { return 33; }, dispenso::globalThreadPool(), std::launch::async);
  EXPECT_TRUE(intFuture.valid());
  int result = intFuture.get();
  EXPECT_EQ(result, 33);

  int refTo = 77;
  dispenso::Future<int&> refFuture(
      [&refTo]() -> int& { return refTo; }, dispenso::globalThreadPool(), std::launch::async);
  EXPECT_TRUE(refFuture.valid());
  int& ref = refFuture.get();
  EXPECT_EQ(&ref, &refTo);
  EXPECT_EQ(77, ref);
}

TEST(Future, TaskSet) {
  dispenso::TaskSet tasks(dispenso::globalThreadPool());
  int foo = 10;
  dispenso::Future<void> voidFuture([&foo]() { foo = 7; }, tasks);
  EXPECT_TRUE(voidFuture.valid());
  voidFuture.get();
  EXPECT_EQ(foo, 7);

  dispenso::Future<int> intFuture([]() { return 33; }, tasks);
  EXPECT_TRUE(intFuture.valid());
  int result = intFuture.get();
  EXPECT_EQ(result, 33);

  int refTo = 77;
  dispenso::Future<int&> refFuture([&refTo]() -> int& { return refTo; }, tasks);
  EXPECT_TRUE(refFuture.valid());
  int& ref = refFuture.get();
  EXPECT_EQ(&ref, &refTo);
  EXPECT_EQ(77, ref);
}

TEST(Future, TaskSetForceQueuing) {
  dispenso::TaskSet tasks(dispenso::globalThreadPool());
  int foo = 10;
  dispenso::Future<void> voidFuture([&foo]() { foo = 7; }, tasks, std::launch::async);
  EXPECT_TRUE(voidFuture.valid());
  voidFuture.get();
  EXPECT_EQ(foo, 7);

  dispenso::Future<int> intFuture([]() { return 33; }, tasks, std::launch::async);
  EXPECT_TRUE(intFuture.valid());
  int result = intFuture.get();
  EXPECT_EQ(result, 33);

  int refTo = 77;
  dispenso::Future<int&> refFuture([&refTo]() -> int& { return refTo; }, tasks, std::launch::async);
  EXPECT_TRUE(refFuture.valid());
  int& ref = refFuture.get();
  EXPECT_EQ(&ref, &refTo);
  EXPECT_EQ(77, ref);
}

TEST(Future, TaskSetWaitImpliesImmediatelyAvailable) {
  dispenso::TaskSet tasks(dispenso::globalThreadPool());
  int foo = 10;
  dispenso::Future<void> voidFuture([&foo]() { foo = 7; }, tasks);
  EXPECT_TRUE(voidFuture.valid());
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_EQ(std::future_status::ready, voidFuture.wait_for(std::chrono::microseconds(1)));
  EXPECT_EQ(foo, 7);

  dispenso::Future<int> intFuture([]() { return 33; }, tasks);
  EXPECT_TRUE(intFuture.valid());
  tasks.wait();
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_EQ(std::future_status::ready, intFuture.wait_for(std::chrono::microseconds(1)));
  int result = intFuture.get();
  EXPECT_EQ(result, 33);

  int refTo = 77;
  dispenso::Future<int&> refFuture([&refTo]() -> int& { return refTo; }, tasks);
  EXPECT_TRUE(refFuture.valid());
  tasks.wait();
  EXPECT_TRUE(refFuture.is_ready());
  EXPECT_EQ(std::future_status::ready, refFuture.wait_for(std::chrono::microseconds(1)));
  int& ref = refFuture.get();
  EXPECT_EQ(&ref, &refTo);
  EXPECT_EQ(77, ref);
}

TEST(Future, ConcurrentTaskSetWaitImpliesImmediatelyAvailable) {
  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  int foo = 10;
  dispenso::Future<void> voidFuture([&foo]() { foo = 7; }, tasks);
  EXPECT_TRUE(voidFuture.valid());
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_EQ(std::future_status::ready, voidFuture.wait_for(std::chrono::microseconds(1)));
  EXPECT_EQ(foo, 7);

  dispenso::Future<int> intFuture([]() { return 33; }, tasks, std::launch::async);
  EXPECT_TRUE(intFuture.valid());
  tasks.wait();
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_EQ(std::future_status::ready, intFuture.wait_for(std::chrono::microseconds(1)));
  int result = intFuture.get();
  EXPECT_EQ(result, 33);

  int refTo = 77;
  dispenso::Future<int&> refFuture([&refTo]() -> int& { return refTo; }, tasks);
  EXPECT_TRUE(refFuture.valid());
  tasks.wait();
  EXPECT_TRUE(refFuture.is_ready());
  EXPECT_EQ(std::future_status::ready, refFuture.wait_for(std::chrono::microseconds(1)));
  int& ref = refFuture.get();
  EXPECT_EQ(&ref, &refTo);
  EXPECT_EQ(77, ref);
}

TEST(Future, LongRunMultipleWaitFor) {
  std::atomic<int> synchronizer(0);
  dispenso::Future<int> intFuture(
      [&synchronizer]() {
        while (synchronizer.load(std::memory_order_relaxed) < 10) {
        }
        return 77;
      },
      dispenso::globalThreadPool(),
      std::launch::async,
      // Note that kNotDeferred is required in this case, because otherwise the functor could run on
      // the current thread in wait_for below, which will hang because the synchronizer will never
      // be incemented.  This is not a common pattern, but it is the reason why kNotDeferred exists.
      dispenso::kNotDeferred);

  size_t loopCount = 0;
  while (intFuture.wait_for(std::chrono::milliseconds(1)) != std::future_status::ready) {
    synchronizer.fetch_add(1, std::memory_order_relaxed);
    ++loopCount;
  }
  int result = intFuture.get();

  EXPECT_GE(loopCount, 10);
  EXPECT_EQ(77, result);
}

TEST(Future, ShareInnerScopeWaits) {
  dispenso::Future<int> intFuture([]() { return 7; }, dispenso::globalThreadPool());

  const int* address;
  {
    auto sharedFuture = intFuture;

    EXPECT_EQ(7, sharedFuture.get());
    address = &sharedFuture.get();
  }
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_EQ(address, &intFuture.get());
}

TEST(Future, BasicLoop) {
  constexpr int kWorkItems = 10000;
  std::vector<dispenso::Future<int>> outputs;

  dispenso::ThreadPool pool(10);
  for (int i = 0; i < kWorkItems; ++i) {
    outputs.emplace_back([i]() { return i * i; }, pool);
  }

  int i = 0;
  for (auto& o : outputs) {
    EXPECT_EQ(o.get(), i * i);
    ++i;
  }
}

TEST(Future, CheckBackwards) {
  constexpr int kWorkItems = 10000;
  std::vector<dispenso::Future<int>> outputs;

  dispenso::ThreadPool pool(10);
  for (int i = 0; i < kWorkItems; ++i) {
    outputs.emplace_back([i]() { return i * i; }, pool);
  }

  for (size_t i = kWorkItems; i--;) {
    EXPECT_EQ(outputs[i].get(), i * i);
  }
}

TEST(Future, Async) {
  int value = 0;
  auto voidFuture = dispenso::async([&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async([]() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async([&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotDeferred) {
  int value = 0;
  auto voidFuture = dispenso::async(dispenso::kNotDeferred, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async(dispenso::kNotDeferred, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async(dispenso::kNotDeferred, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotAsync) {
  int value = 0;
  auto voidFuture = dispenso::async(dispenso::kNotAsync, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async(dispenso::kNotAsync, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async(dispenso::kNotAsync, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncSpecifyThreadPool) {
  dispenso::ThreadPool pool(4);
  int value = 0;
  auto voidFuture = dispenso::async(pool, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async(pool, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async(pool, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotDeferredSpecifyThreadPool) {
  dispenso::ThreadPool pool(4);
  int value = 0;
  auto voidFuture = dispenso::async(pool, dispenso::kNotDeferred, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async(pool, dispenso::kNotDeferred, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture =
      dispenso::async(pool, dispenso::kNotDeferred, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotAsyncSpecifyThreadPool) {
  dispenso::ThreadPool pool(4);
  int value = 0;
  auto voidFuture = dispenso::async(pool, dispenso::kNotAsync, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async(pool, dispenso::kNotAsync, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async(pool, dispenso::kNotAsync, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncSpecifyNewThread) {
  int value = 0;
  auto voidFuture = dispenso::async(dispenso::kNewThreadInvoker, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture = dispenso::async(dispenso::kNewThreadInvoker, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture =
      dispenso::async(dispenso::kNewThreadInvoker, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotDeferredSpecifyNewThread) {
  int value = 0;
  auto voidFuture = dispenso::async(
      dispenso::kNewThreadInvoker, dispenso::kNotDeferred, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture =
      dispenso::async(dispenso::kNewThreadInvoker, dispenso::kNotDeferred, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async(
      dispenso::kNewThreadInvoker, dispenso::kNotDeferred, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotAsyncSpecifyNewThread) {
  int value = 0;
  auto voidFuture =
      dispenso::async(dispenso::kNewThreadInvoker, dispenso::kNotAsync, [&value]() { value = 66; });
  voidFuture.get();
  EXPECT_EQ(66, value);

  auto intFuture =
      dispenso::async(dispenso::kNewThreadInvoker, dispenso::kNotAsync, []() { return 77; });
  EXPECT_EQ(77, intFuture.get());

  auto refFuture = dispenso::async(
      dispenso::kNewThreadInvoker, dispenso::kNotAsync, [&value]() -> int& { return value; });
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncSpecifyTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);
  int value = 0;
  auto voidFuture = dispenso::async(tasks, [&value]() { value = 66; });
  auto intFuture = dispenso::async(tasks, []() { return 77; });
  auto refFuture = dispenso::async(tasks, [&value]() -> int& { return value; });
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_TRUE(refFuture.is_ready());

  EXPECT_EQ(66, value);
  EXPECT_EQ(77, intFuture.get());
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotDeferredSpecifyTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);
  int value = 0;
  auto voidFuture = dispenso::async(tasks, dispenso::kNotDeferred, [&value]() { value = 66; });
  auto intFuture = dispenso::async(tasks, dispenso::kNotDeferred, []() { return 77; });
  auto refFuture =
      dispenso::async(tasks, dispenso::kNotDeferred, [&value]() -> int& { return value; });
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_TRUE(refFuture.is_ready());

  EXPECT_EQ(66, value);
  EXPECT_EQ(77, intFuture.get());
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotAsyncSpecifyTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);
  int value = 0;
  auto voidFuture = dispenso::async(tasks, dispenso::kNotAsync, [&value]() { value = 66; });
  auto intFuture = dispenso::async(tasks, dispenso::kNotAsync, []() { return 77; });
  auto refFuture =
      dispenso::async(tasks, dispenso::kNotAsync, [&value]() -> int& { return value; });
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_TRUE(refFuture.is_ready());

  EXPECT_EQ(66, value);
  EXPECT_EQ(77, intFuture.get());
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncSpecifyConcurrentTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::ConcurrentTaskSet tasks(pool);
  int value = 0;
  auto voidFuture = dispenso::async(tasks, [&value]() { value = 66; });
  auto intFuture = dispenso::async(tasks, []() { return 77; });
  auto refFuture = dispenso::async(tasks, [&value]() -> int& { return value; });
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_TRUE(refFuture.is_ready());

  EXPECT_EQ(66, value);
  EXPECT_EQ(77, intFuture.get());
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotDeferredSpecifyConcurrentTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::ConcurrentTaskSet tasks(pool);
  int value = 0;
  auto voidFuture = dispenso::async(tasks, dispenso::kNotDeferred, [&value]() { value = 66; });
  auto intFuture = dispenso::async(tasks, dispenso::kNotDeferred, []() { return 77; });
  auto refFuture =
      dispenso::async(tasks, dispenso::kNotDeferred, [&value]() -> int& { return value; });
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_TRUE(refFuture.is_ready());

  EXPECT_EQ(66, value);
  EXPECT_EQ(77, intFuture.get());
  EXPECT_EQ(&value, &refFuture.get());
}

TEST(Future, AsyncNotAsyncSpecifyConcurrentTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::ConcurrentTaskSet tasks(pool);
  int value = 0;
  auto voidFuture = dispenso::async(tasks, dispenso::kNotAsync, [&value]() { value = 66; });
  auto intFuture = dispenso::async(tasks, dispenso::kNotAsync, []() { return 77; });
  auto refFuture =
      dispenso::async(tasks, dispenso::kNotAsync, [&value]() -> int& { return value; });
  tasks.wait();
  EXPECT_TRUE(voidFuture.is_ready());
  EXPECT_TRUE(intFuture.is_ready());
  EXPECT_TRUE(refFuture.is_ready());

  EXPECT_EQ(66, value);
  EXPECT_EQ(77, intFuture.get());
  EXPECT_EQ(&value, &refFuture.get());
}

struct Node {
  int value;
  std::unique_ptr<Node> left, right;
};

static void validateTree(Node* node, int val, int depth) {
  if (!node) {
    return;
  }

  EXPECT_EQ(node->value, val);

  validateTree(node->left.get(), val + depth, depth + 1);
  validateTree(node->right.get(), val + depth, depth + 1);
}

static void buildTree(Node* node, int depth) {
  if (depth == 16) {
    return;
  }

  node->left = std::make_unique<Node>();
  node->right = std::make_unique<Node>();
  node->left->value = node->right->value = node->value + depth;
  auto lres = dispenso::async([left = node->left.get(), depth]() { buildTree(left, depth + 1); });
  auto rres =
      dispenso::async([right = node->right.get(), depth]() { buildTree(right, depth + 1); });

  lres.get();
  rres.get();
}

TEST(Future, RecursivelyBuildTree) {
  Node root;
  root.value = 77;
  buildTree(&root, 0);

  validateTree(&root, 77, 0);
}

TEST(Future, BasicThenUsage) {
  int value;
  auto voidFuture = dispenso::async([]() {
                      return 77;
                    }).then([&value](dispenso::Future<int>&& parent) { value = parent.get(); });
  voidFuture.get();
  EXPECT_EQ(77, value);
  auto intFuture = dispenso::async([]() { return 55; }).then([](dispenso::Future<int>&& parent) {
    return parent.get();
  });
  EXPECT_EQ(55, intFuture.get());
  auto int2Future = dispenso::async([&value]() -> int& {
                      return value;
                    }).then([](dispenso::Future<int&>&& parent) { return parent.get(); });
  EXPECT_EQ(77, int2Future.get());

  auto refFuture = dispenso::async([&value]() -> int& {
                     return value;
                   }).then([](dispenso::Future<int&>&& parent) -> int& { return parent.get(); });
  EXPECT_EQ(77, refFuture.get());
  EXPECT_EQ(&value, &refFuture.get());

  intFuture = dispenso::async([&value]() {
                value = 33;
              }).then([&value](dispenso::Future<void>&& /*parent*/) { return value; });
  EXPECT_EQ(33, intFuture.get());

  int* valuePtr;
  voidFuture = dispenso::async([&value]() -> int& {
                 return value;
               }).then([&valuePtr](dispenso::Future<int&>&& parent) { valuePtr = &parent.get(); });
  voidFuture.get();
  EXPECT_EQ(valuePtr, &value);
}

TEST(Future, LongerThenChain) {
  int inval = 123;
  int outval;
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4172) // returning address of local variable or temporary
#endif
  auto intFuture = dispenso::async([&inval]() -> const int& {
                     return inval;
                   }).then([](dispenso::Future<const int&>&& parent) {
                       return parent.get();
                     }).then([&outval](dispenso::Future<int>&& parent) {
    outval = parent.get();
    return parent.get();
  });
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
  EXPECT_EQ(inval, intFuture.get());
  EXPECT_EQ(inval, outval);
}

TEST(Future, MultiThenReadyAllInline) {
  dispenso::Future<int> intFuture = dispenso::make_ready_future(128);
  auto a = intFuture.then(
      [](dispenso::Future<int>&& parent) { return parent.get(); }, dispenso::kImmediateInvoker);
  auto b = intFuture.then(
      [](dispenso::Future<int>&& parent) { return parent.get() * parent.get(); },
      dispenso::kImmediateInvoker);
  EXPECT_EQ(intFuture.get(), 128);
  EXPECT_EQ(a.get(), 128);
  EXPECT_EQ(b.get(), 128 * 128);
}

TEST(Future, MultiThenReadyDelayedOrigin) {
  dispenso::Future<int> intFuture = dispenso::async([]() {
    std::this_thread::sleep_for(std::chrono::milliseconds(5));
    return 128;
  });
  auto a = intFuture.then(
      [](dispenso::Future<int>&& parent) { return parent.get(); }, dispenso::kImmediateInvoker);
  auto b = intFuture.then(
      [](dispenso::Future<int>&& parent) { return parent.get() * parent.get(); },
      dispenso::kImmediateInvoker);
  EXPECT_EQ(intFuture.get(), 128);
  EXPECT_EQ(a.get(), 128);
  EXPECT_EQ(b.get(), 128 * 128);
}

TEST(Future, MultiThenReadyDelayedOriginNotImmediateThen) {
  dispenso::Future<int> intFuture = dispenso::async([]() {
    std::this_thread::sleep_for(std::chrono::milliseconds(5));
    return 128;
  });
  auto a = intFuture.then([](dispenso::Future<int>&& parent) { return parent.get(); });
  auto b =
      intFuture.then([](dispenso::Future<int>&& parent) { return parent.get() * parent.get(); });
  EXPECT_EQ(intFuture.get(), 128);
  EXPECT_EQ(a.get(), 128);
  EXPECT_EQ(b.get(), 128 * 128);
}

TEST(Future, MultiThenReadyDelayedOriginTightLoop) {
  std::atomic<bool> originReady{false};
  dispenso::Future<int> intFuture = dispenso::async([&originReady]() {
    std::this_thread::sleep_for(std::chrono::milliseconds(1));
    originReady.store(true);
    return 128;
  });

  std::vector<dispenso::Future<int>> futureVec;

  int extraLoops = 100;
  while (true) {
    futureVec.emplace_back(
        intFuture.then([](dispenso::Future<int>&& parent) { return parent.get() * parent.get(); }));
    if (originReady.load(std::memory_order_relaxed)) {
      if (extraLoops) {
        --extraLoops;
      } else {
        break;
      }
    }
  }

  EXPECT_GE(futureVec.size(), 101);

  for (auto& f : futureVec) {
    EXPECT_EQ(f.get(), 128 * 128);
  }
}

TEST(Future, ImmediateInvoker) {
  // Don't write code like this in real life.  It is (much) cheaper to use make_ready_future.
  dispenso::Future<int> future([]() { return 333; }, dispenso::kImmediateInvoker);
  EXPECT_TRUE(future.is_ready());
  EXPECT_EQ(333, future.get());

  int val = 5;
  dispenso::Future<void> vfuture([&val]() { val = 7; }, dispenso::kImmediateInvoker);
  EXPECT_TRUE(vfuture.is_ready());
  EXPECT_EQ(7, val);
}

TEST(Future, NewThreadInvoker) {
  // Nearly always it should be better to use async/ThreadPool/TaskSet, but there may be occasions
  // where you actually want the work done on a new thread.
  dispenso::Future<int> future([]() { return 333; }, dispenso::kNewThreadInvoker);
  EXPECT_EQ(333, future.get());
}

#if defined(__cpp_exceptions)
TEST(Future, SimpleExceptions) {
  bool handledException = false;
  auto intFuture = dispenso::async([]() {
    throw(std::logic_error("oops"));
    return 333;
  });

  try {
    EXPECT_EQ(777, intFuture.get());
  } catch (const std::logic_error&) {
    handledException = true;
  }
  EXPECT_TRUE(handledException);
}

TEST(Future, SimpleExceptionsReference) {
  bool handledException = false;
  int val = 333;
  auto refFuture = dispenso::async([&val]() -> int& {
    throw(std::logic_error("oops"));
    return val;
  });

  try {
    EXPECT_EQ(777, refFuture.get());
  } catch (const std::logic_error&) {
    handledException = true;
  }
  EXPECT_TRUE(handledException);
}

TEST(Future, SimpleExceptionsVoid) {
  bool handledException = false;
  int val = 333;
  auto voidFuture = dispenso::async([]() { throw(std::logic_error("oops")); });

  try {
    voidFuture.get();
    EXPECT_EQ(999, val);
  } catch (const std::logic_error&) {
    handledException = true;
  }
  EXPECT_TRUE(handledException);
}

TEST(Future, ThenExceptions) {
  bool handledException = false;
  auto intFuture = dispenso::async([]() {
    throw(std::logic_error("oops"));
    return 333;
  });

  try {
    auto resultingFuture = intFuture.then([](auto&& parent) { return parent.get(); });

    EXPECT_EQ(777, resultingFuture.get());
  } catch (const std::logic_error&) {
    handledException = true;
  }
  EXPECT_TRUE(handledException);
}

struct SomeType {
  SomeType(int init) : ptr(new int(init)) {}

  SomeType(const SomeType& oth) : ptr(new int(*oth.ptr)) {}

  SomeType& operator=(const SomeType& oth) {
    if (&oth != this) {
      ptr = new int(*oth.ptr);
    }
    return *this;
  }

  ~SomeType() {
    delete ptr;
  }
  int* ptr;
};

TEST(Future, ExceptionShouldntDestroyResultIfNotCreated) {
  auto noExcept = dispenso::async([]() { return SomeType(5); });

  auto res = noExcept.get();
  EXPECT_EQ(*res.ptr, 5);

  auto withExcept = dispenso::async([]() {
    throw(std::logic_error("oops"));
    return SomeType(5);
  });

  bool handledException = false;
  try {
    res = withExcept.get();
    EXPECT_EQ(777, *res.ptr);
  } catch (const std::logic_error&) {
    handledException = true;
  }
  EXPECT_TRUE(handledException);
}
#endif //__cpp_exceptions

TEST(Future, WhenAllEmptyVector) {
  std::deque<dispenso::Future<int>> items;
  auto dummy = dispenso::when_all(items.begin(), items.end()).then([](auto&& readyFutures) {
    EXPECT_TRUE(readyFutures.is_ready());
    auto& vec = readyFutures.get();
    EXPECT_EQ(0, vec.size());
  });

  dummy.wait();
}

TEST(Future, WhenAllVector) {
  constexpr int kWorkItems = 10000;
  std::deque<dispenso::Future<int>> items;

  dispenso::ThreadPool pool(10);
  int64_t expectedSum = 0;
  for (int i = 0; i < kWorkItems; ++i) {
    items.emplace_back([i]() { return i * i; }, pool);
    expectedSum += i * i;
  }

  auto finalSum = dispenso::when_all(items.begin(), items.end()).then([](auto&& readyFutures) {
    EXPECT_TRUE(readyFutures.is_ready());
    auto& vec = readyFutures.get();
    int64_t sum = 0;
    for (auto& f : vec) {
      EXPECT_TRUE(f.is_ready());
      sum += f.get();
    }
    return sum;
  });

  EXPECT_EQ(finalSum.get(), expectedSum);
}

TEST(Future, WhenAllEmptyTuple) {
  auto dummy =
      dispenso::when_all().then([](auto&& readyFutures) { EXPECT_TRUE(readyFutures.is_ready()); });
  dummy.wait();
}

TEST(Future, WhenAllTuple) {
  int value = 16;
  int value2 = 1024;

  dispenso::Future<int> intFuture([]() { return 77; }, dispenso::globalThreadPool());
  dispenso::Future<void> voidFuture([&value]() { value = 88; }, dispenso::globalThreadPool());
  dispenso::Future<float> floatFuture([]() { return 99.0f; }, dispenso::globalThreadPool());
  dispenso::Future<int&> refFuture(
      [&value2]() -> int& { return value2; }, dispenso::globalThreadPool());

  auto finalSum = dispenso::when_all(intFuture, voidFuture, floatFuture, refFuture)
                      .then([&value, &value2](auto&& readyFutures) {
                        EXPECT_TRUE(readyFutures.is_ready());

                        auto& tuple = readyFutures.get();
                        EXPECT_EQ(std::get<0>(tuple).get(), 77);
                        EXPECT_EQ(value, 88);
                        EXPECT_EQ(std::get<2>(tuple).get(), 99.0f);
                        EXPECT_EQ(&std::get<3>(tuple).get(), &value2);
                        return std::get<0>(tuple).get() + value +
                            static_cast<int>(std::get<2>(tuple).get()) + std::get<3>(tuple).get();
                      });

  EXPECT_EQ(finalSum.get(), 1288);
}

inline std::unique_ptr<Node> nodeMove(const std::unique_ptr<Node>& current) {
  return std::move(const_cast<std::unique_ptr<Node>&>(current));
}

static dispenso::Future<std::unique_ptr<Node>> makeTree(
    uint32_t depth,
    std::atomic<uint32_t>& cur) {
  --depth;
  auto node = std::make_unique<Node>();
  node->value = static_cast<int>(cur.fetch_add(1, std::memory_order_relaxed));
  if (!depth) {
    return dispenso::make_ready_future(std::move(node));
  }

  // TODO(bbudge): Can we make this nicer via unwrapping constructor?
  auto left = dispenso::async([depth, &cur]() { return makeTree(depth, cur); });
  auto right = dispenso::async([depth, &cur]() { return makeTree(depth, cur); });

  return dispenso::when_all(left, right).then([node = std::move(node)](auto&& both) mutable {
    auto& tuple = both.get();
    auto& futFutLeft = std::get<0>(tuple);
    auto& futFutRight = std::get<1>(tuple);
    node->left = nodeMove(futFutLeft.get().get());
    node->right = nodeMove(futFutRight.get().get());
    return std::move(node);
  });
}

static void fillVector(std::unique_ptr<Node>& node, std::vector<uint32_t>& values) {
  if (!node) {
    return;
  }

  size_t val = static_cast<size_t>(node->value);

  if (values.size() <= val) {
    values.resize(val + 1, 0);
  }
  ++values[val];
  fillVector(node->left, values);
  fillVector(node->right, values);
}

TEST(Future, WhenAllTreeBuild) {
  std::atomic<uint32_t> val(0);
  auto result = makeTree(6, val);

  std::vector<uint32_t> values(16, 0);

  std::unique_ptr<Node> root = nodeMove(result.get());

  fillVector(root, values);

  for (auto& v : values) {
    EXPECT_EQ(v, 1);
  }
}

static dispenso::Future<std::unique_ptr<Node>> makeTreeIters(
    uint32_t depth,
    std::atomic<uint32_t>& cur) {
  --depth;
  auto node = std::make_unique<Node>();
  node->value = static_cast<int>(cur.fetch_add(1, std::memory_order_relaxed));
  if (!depth) {
    return dispenso::make_ready_future(std::move(node));
  }

  // TODO(bbudge): Can we make this nicer via unwrapping constructor?
  auto left = dispenso::async([depth, &cur]() { return makeTree(depth, cur); });
  auto right = dispenso::async([depth, &cur]() { return makeTree(depth, cur); });

  std::array<decltype(left), 2> children = {left, right};

  return dispenso::when_all(std::begin(children), std::end(children))
      .then([node = std::move(node)](auto&& both) mutable {
        auto& vec = both.get();
        EXPECT_EQ(vec.size(), 2);
        auto& futFutLeft = vec[0];
        auto& futFutRight = vec[1];
        node->left = nodeMove(futFutLeft.get().get());
        node->right = nodeMove(futFutRight.get().get());
        return std::move(node);
      });
}

TEST(Future, WhenAllTreeBuildIters) {
  std::atomic<uint32_t> val(0);
  auto result = makeTreeIters(6, val);

  std::vector<uint32_t> values(16, 0);

  std::unique_ptr<Node> root = nodeMove(result.get());

  fillVector(root, values);

  for (auto& v : values) {
    EXPECT_EQ(v, 1);
  }
}

// Pretty convoluted, but there was a bug where taskset wait does not imply the future is finished.
TEST(Future, TaskSetWaitImpliesFinished) {
  std::atomic<int> status(0);
  std::atomic<int> sidelineResult(0);
  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());

  std::thread waiterThread([&tasks, &status, &sidelineResult]() {
    while (status.load(std::memory_order_acquire) == 0) {
    }
    tasks.wait();
    EXPECT_EQ(sidelineResult.load(std::memory_order_acquire), 1);
  });

  dispenso::Future<int> intFuture(
      [&sidelineResult, &status]() {
        status.store(1, std::memory_order_release);
        std::this_thread::sleep_for(std::chrono::milliseconds(50));
        sidelineResult.store(1, std::memory_order_release);
        return 77;
      },
      tasks);

  EXPECT_EQ(77, intFuture.get());

  waiterThread.join();
}

TEST(Future, TaskSetWaitImpliesWhenAllFinished) {
  std::vector<dispenso::Future<int>> futures;
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());

  for (int i = 0; i < 100; ++i) {
    futures.emplace_back(dispenso::Future<int>([i]() { return i; }, taskSet));
  }

  auto result = dispenso::when_all(taskSet, futures.begin(), futures.end())
                    .then(
                        [](auto&& future) {
                          auto& vec = future.get();
                          int total = 0;
                          for (auto& f : vec) {
                            total += f.get();
                          }
                        },
                        taskSet);

  taskSet.wait();
  EXPECT_TRUE(result.is_ready());
}

TEST(Future, ConcurrentTaskSetWaitImpliesWhenAllFinished) {
  std::vector<dispenso::Future<int>> futures;
  dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

  for (int i = 0; i < 100; ++i) {
    futures.emplace_back(dispenso::Future<int>([i]() { return i; }, taskSet));
  }

  auto result = dispenso::when_all(taskSet, futures.begin(), futures.end())
                    .then(
                        [](auto&& future) {
                          auto& vec = future.get();
                          int total = 0;
                          for (auto& f : vec) {
                            total += f.get();
                          }
                        },
                        taskSet);

  taskSet.wait();
  EXPECT_TRUE(result.is_ready());
}

TEST(Future, Copy) {
  std::shared_ptr<int> sp = std::make_shared<int>(5);

  {
    auto fut = dispenso::make_ready_future(sp);

    const std::shared_ptr<int>& sp2 = fut.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);

  {
    auto fut = dispenso::make_ready_future(sp);
    auto fut2 = fut;

    const std::shared_ptr<int>& sp2 = fut2.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }

  {
    auto fut = dispenso::make_ready_future(sp);
    decltype(fut) fut2;
    fut2 = fut;

    const std::shared_ptr<int>& sp2 = fut2.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);
}

TEST(Future, Move) {
  std::shared_ptr<int> sp = std::make_shared<int>(5);

  {
    auto fut = dispenso::make_ready_future(sp);

    const std::shared_ptr<int>& sp2 = fut.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);

  {
    auto fut = dispenso::make_ready_future(sp);
    auto fut2 = std::move(fut);

    EXPECT_FALSE(fut.valid());

    const std::shared_ptr<int>& sp2 = fut2.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }

  {
    auto fut = dispenso::make_ready_future(sp);
    decltype(fut) fut2;
    fut2 = std::move(fut);

    EXPECT_FALSE(fut.valid());

    const std::shared_ptr<int>& sp2 = fut2.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }

  {
    auto fut = dispenso::make_ready_future(sp);
    decltype(fut) fut2;
    fut2 = std::move(fut);

    EXPECT_FALSE(fut.valid());

    const std::shared_ptr<int>& sp2 = fut2.get();
    EXPECT_EQ(*sp2, 5);
    EXPECT_EQ(sp.use_count(), 2);
  }

  EXPECT_EQ(sp.use_count(), 1);
}

TEST(Future, ThenRefCountImmediate) {
  std::shared_ptr<int> sp = std::make_shared<int>(5);

  {
    auto fut = dispenso::make_ready_future(sp);
    fut = fut.then(
        [](auto&& prev) {
          auto sp2 = prev.get();
          *sp2 = 7;
          return sp2;
        },
        dispenso::kImmediateInvoker);

    const std::shared_ptr<int>& sp3 = fut.get();
    EXPECT_EQ(*sp3, 7);
    EXPECT_EQ(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);
}

TEST(Future, MoveThenRefCountImmediate) {
  std::shared_ptr<int> sp = std::make_shared<int>(5);

  {
    auto fut = dispenso::make_ready_future(sp);
    fut = std::move(fut).then(
        [](auto&& prev) {
          auto sp2 = prev.get();
          *sp2 = 7;
          return sp2;
        },
        dispenso::kImmediateInvoker);

    const std::shared_ptr<int>& sp2 = fut.get();
    EXPECT_EQ(*sp2, 7);
    EXPECT_EQ(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);
}

TEST(Future, ThenRefCountThreadPool) {
  std::shared_ptr<int> sp = std::make_shared<int>(5);

  {
    dispenso::ThreadPool pool(1);
    {
      auto fut = dispenso::make_ready_future(sp);
      fut = fut.then(
          [](auto&& prev) {
            auto sp2 = prev.get();
            *sp2 = 7;
            return sp2;
          },
          pool);

      const std::shared_ptr<int>& sp3 = fut.get();
      EXPECT_EQ(*sp3, 7);
      EXPECT_EQ(sp.use_count(), 2);
    }
    // Warning, implementation detail :)
    // At this point, it is possible that the second future still has a reference count > 0, as it
    // is held in the OnceFunction submitted to the pool.  If the pool hasn't executed the
    // OnceFunction yet, the sp.use_count() will be 2.  If it has, it will be 1.
    EXPECT_LE(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);
}

TEST(Future, MoveThenRefCountThreadPool) {
  std::shared_ptr<int> sp = std::make_shared<int>(5);

  {
    dispenso::ThreadPool pool(1);
    {
      auto fut = dispenso::make_ready_future(sp);
      fut = std::move(fut).then(
          [](auto&& prev) {
            auto sp2 = prev.get();
            *sp2 = 7;
            return sp2;
          },
          pool);

      const std::shared_ptr<int>& sp2 = fut.get();
      EXPECT_EQ(*sp2, 7);
      EXPECT_EQ(sp.use_count(), 2);
    }
    // Warning, implementation detail :)
    // At this point, it is possible that the second future still has a reference count > 0, as it
    // is held in the OnceFunction submitted to the pool.  If the pool hasn't executed the
    // OnceFunction yet, the sp.use_count() will be 2.  If it has, it will be 1.
    EXPECT_LE(sp.use_count(), 2);
  }
  EXPECT_EQ(sp.use_count(), 1);
}


================================================
FILE: tests/graph_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <numeric>
#include <random>

#include <dispenso/graph.h>
#include <dispenso/graph_executor.h>
#include <gtest/gtest.h>

TEST(Graph, Simple) {
  // ┌─────────┐     ┌─────────┐     ┌─────────┐
  // │ 0: v+=1 │ ──▶ │ 1: v*=2 │ ──▶ │ 4: v+=p │
  // └─────────┘     └─────────┘     └─────────┘
  //                                   ▲
  // ┌─────────┐     ┌─────────┐       │
  // │ 2: p+=4 │ ──▶ │ 3: p/=2 │ ──────┘
  // └─────────┘     └─────────┘
  float v = 0.f;
  float p = 0.f;
  dispenso::Graph g;

  dispenso::Node& N0 = g.addNode([&v]() { v += 1; });
  dispenso::Node& N1 = g.addNode([&v]() { v *= 2; });
  dispenso::Node& N2 = g.addNode([&p]() { p += 8; });
  dispenso::Node& N3 = g.addNode([&p]() { p /= 2; });
  dispenso::Node& N4 = g.addNode([&p, &v]() { v += p; });

  N4.dependsOn(N1, N3);
  N1.dependsOn(N0);
  N3.dependsOn(N2);

  dispenso::ConcurrentTaskSet concurrentTaskSet(dispenso::globalThreadPool());
  dispenso::ConcurrentTaskSetExecutor concurrentTaskSetExecutor;
  setAllNodesIncomplete(g);
  concurrentTaskSetExecutor(concurrentTaskSet, g);
  EXPECT_EQ(v, 6.f);

  v = 0.f;
  p = 0.f;
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ParallelForExecutor parallelForExecutor;
  setAllNodesIncomplete(g);
  parallelForExecutor(taskSet, g);
  EXPECT_EQ(v, 6.f);

  dispenso::SingleThreadExecutor singleThreadExecutor;
  v = 0.f;
  p = 0.f;
  setAllNodesIncomplete(g);
  singleThreadExecutor(g);
  EXPECT_EQ(v, 6.f);

  EXPECT_EQ(g.numNodes(), 5);
}
enum class EvalMode : uint8_t { singleThread, parallelFor, concurrentTaskSet };

template <class T>
std::string modeName(const testing::TestParamInfo<typename T::ParamType>& info) {
  static std::string names[3] = {"singleThread", "parallelFor", "concurrentTaskSet"};
  return names[static_cast<uint8_t>(info.param)];
}

class Executor {
 public:
  Executor()
      : taskSet(dispenso::globalThreadPool()), concurrentTaskSet(dispenso::globalThreadPool()) {}

  template <typename G>
  void operator()(EvalMode mode, const G& graph) {
    if (mode == EvalMode::singleThread) {
      singleThreadExecutor(graph);
    } else if (mode == EvalMode::parallelFor) {
      parallelForExecutor(taskSet, graph);
    } else if (mode == EvalMode::concurrentTaskSet) {
      concurrentTaskSetExecutor(concurrentTaskSet, graph);
    }
  }

 private:
  dispenso::TaskSet taskSet;
  dispenso::ConcurrentTaskSet concurrentTaskSet;
  dispenso::ConcurrentTaskSetExecutor concurrentTaskSetExecutor;
  dispenso::SingleThreadExecutor singleThreadExecutor;
  dispenso::ParallelForExecutor parallelForExecutor;
};

struct SingleThreadMode {
  static constexpr EvalMode mode = EvalMode::singleThread;
};
struct ParallelForMode {
  static constexpr EvalMode mode = EvalMode::parallelFor;
};
struct ConcurrentTaskSetMode {
  static constexpr EvalMode mode = EvalMode::concurrentTaskSet;
};

class TwoSubgraphs : public testing::TestWithParam<EvalMode> {
 protected:
  void SetUp() override {
    //
    // ∙----subgraph1---∙ ∙---subgraph2-------∙
    // ¦ ┌────────────┐ ¦ ¦ ┌───────────────┐ ¦   ┌───────────────────┐
    // ¦ │ 0: r[0]+=1 │ ──▶ │ 1: r[1]=r[0]*2│ ──▶ │ 4: r[4]=r[1]+r[3] │
    // ¦ └────────────┘ ¦ ¦ └───────────────┘ ¦   └───────────────────┘
    // ¦                ¦ ¦                   ¦     ▲
    // ¦ ┌────────────┐ ¦ ¦ ┌───────────────┐ ¦     │
    // ¦ │ 2: r[2]+=8 │ ──▶ │ 3: r[3]=r[2]/2│ ──────┘
    // ¦ └────────────┘ ¦ ¦ └───────────────┘ ¦
    // ∙----------------∙ ∙-------------------∙

    EXPECT_EQ(graph_.numSubgraphs(), 1); // graph has subgraph 0 by default
    subgraph1_ = &graph_.addSubgraph();
    subgraph2_ = &graph_.addSubgraph();

    N_[0] = &subgraph1_->addNode([&]() { r_[0] += 1; });
    N_[2] = &subgraph1_->addNode([&]() { r_[2] += 8; });
    N_[1] = &subgraph2_->addNode([&]() { r_[1] += r_[0] * 2; });
    N_[3] = &subgraph2_->addNode([&]() { r_[3] += r_[2] / 2; });
    N_[4] = &graph_.addNode([&]() { r_[4] += r_[1] + r_[3]; });

    N_[4]->dependsOn(*N_[1], *N_[3]);
    N_[1]->dependsOn(*N_[0]);
    N_[3]->dependsOn(*N_[2]);
  }

  void evaluateGraph(const dispenso::Graph& graph) {
    executor_(GetParam(), graph);
  }

  std::array<float, 5> r_;
  std::array<dispenso::Node*, 5> N_;
  dispenso::Graph graph_;
  dispenso::Subgraph* subgraph1_;
  dispenso::Subgraph* subgraph2_;
  Executor executor_;
  dispenso::ForwardPropagator forwardPropagator_;
};
TEST_P(TwoSubgraphs, ReplaceSourceGraph) {
  setAllNodesIncomplete(graph_);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);
  EXPECT_EQ(graph_.numSubgraphs(), 3);
  EXPECT_EQ(subgraph1_->numNodes(), 2);
  EXPECT_EQ(subgraph2_->numNodes(), 2);

  subgraph1_->clear();
  EXPECT_EQ(subgraph1_->numNodes(), 0);

  N_[0] = &subgraph1_->addNode([&]() { r_[0] += 1; });
  EXPECT_EQ(subgraph1_->numNodes(), 1);
  N_[2] = &subgraph1_->addNode([&]() { r_[2] += 8; });
  EXPECT_EQ(subgraph1_->numNodes(), 2);
  N_[1]->dependsOn(*N_[0]);
  N_[3]->dependsOn(*N_[2]);

  setAllNodesIncomplete(graph_);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);
}

TEST_P(TwoSubgraphs, ReplaceMiddleGraph) {
  setAllNodesIncomplete(graph_);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  dispenso::Graph movedGraph(std::move(graph_));
  subgraph2_->clear();

  N_[1] = &subgraph2_->addNode([&]() { r_[1] += r_[0] * 2; });
  N_[3] = &subgraph2_->addNode([&]() { r_[3] += r_[2] / 2; });
  N_[1]->dependsOn(*N_[0]);
  N_[3]->dependsOn(*N_[2]);
  N_[4]->dependsOn(*N_[1], *N_[3]);

  setAllNodesIncomplete(movedGraph);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(movedGraph);
  EXPECT_EQ(r_[4], 6.f);
}

TEST_P(TwoSubgraphs, ReplaceBothGraphs) {
  setAllNodesIncomplete(graph_);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  subgraph2_->clear();
  dispenso::Graph movedGraph = std::move(graph_);
  subgraph1_->clear();

  N_[0] = &subgraph1_->addNode([&]() { r_[0] += 1; });
  N_[2] = &subgraph1_->addNode([&]() { r_[2] += 8; });
  N_[1] = &subgraph2_->addNode([&]() { r_[1] += r_[0] * 2; });
  N_[3] = &subgraph2_->addNode([&]() { r_[3] += r_[2] / 2; });

  N_[4]->dependsOn(*N_[1], *N_[3]);
  N_[1]->dependsOn(*N_[0]);
  N_[3]->dependsOn(*N_[2]);

  setAllNodesIncomplete(movedGraph);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(movedGraph);
  EXPECT_EQ(r_[4], 6.f);
}

TEST_P(TwoSubgraphs, PartialEvaluation) {
  setAllNodesIncomplete(graph_);
  r_ = {0, 0, 0, 0, 0};
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  graph_.forEachNode([](const dispenso::Node& node) { EXPECT_FALSE(!node.isCompleted()); });
  N_[1]->setIncomplete();
  r_[1] = r_[4] = 0;
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  N_[2]->setIncomplete();
  r_[2] = r_[3] = r_[4] = 0;
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  N_[1]->setIncomplete();
  N_[3]->setIncomplete();
  r_[1] = r_[3] = r_[4] = 0;
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  N_[4]->setIncomplete();
  r_[4] = 0;
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  N_[2]->setIncomplete();
  N_[0]->setIncomplete();
  r_ = {0, 0, 0, 0, 0};
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  N_[0]->setIncomplete();
  N_[1]->setIncomplete();
  N_[4]->setIncomplete();
  r_[0] = r_[1] = r_[4] = 0;
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);

  N_[2]->setIncomplete();
  N_[3]->setIncomplete();
  N_[4]->setIncomplete();
  r_[2] = r_[3] = r_[4] = 0;
  forwardPropagator_(graph_);
  evaluateGraph(graph_);
  EXPECT_EQ(r_[4], 6.f);
}

INSTANTIATE_TEST_SUITE_P(
    GraphEvaluation,
    TwoSubgraphs,
    testing::Values(EvalMode::singleThread, EvalMode::parallelFor, EvalMode::concurrentTaskSet),
    modeName<TwoSubgraphs>);

class BiPropGraphTest : public testing::TestWithParam<EvalMode> {
 protected:
  void SetUp() override {
    //                                     ┌─────────────────┐
    //                                     │ 5: m5+=m3*m4    │◀─────────────────────┐
    //                                     └─────────────────┘                      │
    //                                       ▲                                      │
    // ┌−−−−−−−−−−−−−−−−Group1 (a,b)−−−−−−−−−│−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−┐ │
    // ╎ ┌─────────┐     ┌───────────┐     ┌─┴───────────────┐     ┌────────────┐ ╎ │
    // ╎ │         │     │  0: a+=1  │     │     3: b+=5     │     │            │ ╎ │
    // ╎ │ 2: a+=3 │     │   b+=5    │     │     m3+=b       │     │ 6: b/=m4   │ ╎ │
    // ╎ │         │ ◁───┤ m0+=a+b   ├───▷ │                 ├───▷ │            │ ╎ │
    // ╎ └─────────┘     └─┬─────────┘     └─────────────────┘     └────────────┘ ╎ │
    // └−−−−−−−−−−−−−−−−−−−│−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−▲−−−−−−−−−−−−┘ │
    //                     └─────────────────┐                       │              │
    //                 ┌−−−−−−−Group2 (c)−−−−▼−−−−−−−−−−−−−−−−−┐     │              │
    //                 ╎ ┌───────────┐     ┌─────────────────┐ ╎   ┌─┴──────────┐   │
    //                 ╎ │  7: c+=5  ├───▷ │   1: c+=m0      │ ╎   │ 4: m4+=3   ├───┘
    //                 ╎ └───────────┘     └─────────────────┘ ╎   └────────────┘
    //                 └−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−┘
    //  Legend:
    //  ──▶ Normal dependency
    //  ──▷ Bidirectional propagation dependency
    //  m4  variable modified only in node 4
    N[0] = &g.addNode([&]() {
      a += 1;
      b += 5;
      m0 += a + b;
    });
    N[1] = &g.addNode([&]() { c += m0; });
    N[2] = &g.addNode([&]() { a += 3; });
    N[3] = &g.addNode([&]() {
      b += 5;
      m3 += b;
    });
    N[4] = &g.addNode([&]() { m4 += 3; });
    N[5] = &g.addNode([&]() { m5 += m3 * m4; });
    N[6] = &g.addNode([&]() { b /= m4; });
    N[7] = &g.addNode([&]() { c += 5; });

    N[5]->dependsOn(*N[3], *N[4]);
    N[1]->dependsOn(*N[0]);
    N[6]->dependsOn(*N[4]);

    N[2]->biPropDependsOn(*N[0]);
    N[3]->biPropDependsOn(*N[0]);
    N[6]->biPropDependsOn(*N[3]);
    N[1]->biPropDependsOn(*N[7]);
  }

  void checkResults() {
    EXPECT_EQ(a, 4.f);
    EXPECT_EQ(b, 10.f / 3.f);
    EXPECT_EQ(c, 11.f);
    EXPECT_EQ(m5, 30.f);
  }

  float a, b, c, m0, m3, m4, m5;
  dispenso::BiPropGraph g;
  std::array<dispenso::BiPropNode*, 8> N;
  Executor executor_;
  dispenso::ForwardPropagator forwardPropagator_;
};

TEST_P(BiPropGraphTest, SimpleEvaluation) {
  a = b = c = m0 = m3 = m4 = m5 = 0.f;
  setAllNodesIncomplete(g);
  executor_(GetParam(), g);
  checkResults();

  a = b = c = m0 = m3 = m4 = m5 = 0.f;
  setAllNodesIncomplete(g);
  executor_(GetParam(), g);
  checkResults();

  N[4]->setIncomplete();
  m5 = m4 = m3 = m0 = a = b = 0.f;
  forwardPropagator_(g);
  executor_(GetParam(), g);
  checkResults();

  N[1]->setIncomplete();
  c = 0.f;
  forwardPropagator_(g);
  executor_(GetParam(), g);
  checkResults();

  N[7]->setIncomplete();
  c = 0.f;
  forwardPropagator_(g);
  executor_(GetParam(), g);
  checkResults();

  N[0]->setIncomplete();
  a = b = c = m0 = m3 = m5 = 0.f;
  forwardPropagator_(g);
  executor_(GetParam(), g);
  checkResults();

  N[5]->setIncomplete();
  m5 = 0.f;
  forwardPropagator_(g);
  executor_(GetParam(), g);
  checkResults();

  N[4]->setIncomplete();
  N[6]->setIncomplete();
  m5 = m4 = m3 = m0 = a = b = 0.f;
  forwardPropagator_(g);
  executor_(GetParam(), g);
  checkResults();
}

INSTANTIATE_TEST_SUITE_P(
    BiPropEvaluation,
    BiPropGraphTest,
    testing::Values(EvalMode::singleThread, EvalMode::parallelFor, EvalMode::concurrentTaskSet),
    modeName<BiPropGraphTest>);

template <class T>
class BigTree : public testing::Test {
 protected:
  static size_t sizeOfLevel(size_t level) {
    return size_t(1) << (numBits_ - shiftStep_ * level);
  }
  void SetUp() override {
    // the goal of this task is to calculate the sum of the numbers in this array
    for (size_t level = 1; level < numLevels_; ++level) {
      const size_t s = sizeOfLevel(level);
      data_[level].resize(s, size_t(0));
    }

    // zero level data is input data for calcualtion
    data_[0].resize(size_t(1) << numBits_);
    std::iota(data_[0].begin(), data_[0].end(), 0);

    for (size_t level = 1; level < numLevels_; ++level) {
      SubGraphType* subgraph = &g_.addSubgraph();
      subgraphs_[level] = subgraph;
      buildLevel(level);
    }
  }

  void rebuildLevel(size_t level) {
    buildLevel(level);
    if (level == numLevels_ - 1) {
      return;
    }

    const size_t level1 = level + 1;
    const size_t numNodes = size_t(1) << (numBits_ - shiftStep_ * level1);

    for (size_t n = 0; n < numNodes; ++n) {
      if (level1 > 1) {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          subgraphs_[level1]->node(n).dependsOn(subgraphs_[level1 - 1]->node(index));
        }
      }
    }
  }

  void buildLevel(size_t level) {
    SubGraphType* subgraph = subgraphs_[level];

    const size_t numNodes = size_t(1) << (numBits_ - shiftStep_ * level);

    for (size_t n = 0; n < numNodes; ++n) {
      subgraph->addNode([this, level, n]() {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          data_[level][n] += data_[level - 1][index];
        }
      });
      if (level > 1) {
        for (size_t j = 0; j < numPredecessors_; ++j) {
          const size_t index = (n << shiftStep_) | j;
          subgraphs_[level]->node(n).dependsOn(subgraphs_[level - 1]->node(index));
        }
      }
    }
  }
  static constexpr size_t shiftStep_ = 4; // 4
  static constexpr size_t numBits_ = 5 * shiftStep_; // 5 // should be divisible by shiftStep
  static constexpr size_t numPredecessors_ = 1 << shiftStep_;
  static constexpr size_t startShnft_ = shiftStep_;
  static constexpr size_t numLevels_ = numBits_ / shiftStep_ + 1;
  static constexpr size_t level0Size_ = size_t(1) << numBits_;

  using GraphType = std::tuple_element_t<0, T>;
  using SubGraphType = typename GraphType::SubgraphType;
  static constexpr EvalMode mode_ = std::tuple_element_t<1, T>::mode;

  std::array<std::vector<size_t>, numLevels_> data_;
  std::array<SubGraphType*, numLevels_> subgraphs_;
  GraphType g_;
  Executor executor_;
  dispenso::ForwardPropagator forwardPropagator_;
};

using GraphCases = ::testing::Types<
    std::tuple<dispenso::Graph, SingleThreadMode>,
    std::tuple<dispenso::Graph, ParallelForMode>,
    std::tuple<dispenso::Graph, ConcurrentTaskSetMode>,
    std::tuple<dispenso::BiPropGraph, SingleThreadMode>,
    std::tuple<dispenso::BiPropGraph, ParallelForMode>,
    std::tuple<dispenso::BiPropGraph, ConcurrentTaskSetMode>>;

TYPED_TEST_SUITE(BigTree, GraphCases, );

TYPED_TEST(BigTree, FullAndPartialEvaluation) {
  size_t result = std::accumulate(this->data_[0].begin(), this->data_[0].end(), size_t(0));

  setAllNodesIncomplete(this->g_);
  this->executor_(this->mode_, this->g_);

  EXPECT_EQ(this->data_[this->numLevels_ - 1][0], result);

  std::random_device dev;
  std::mt19937 rng(dev());
  std::uniform_int_distribution<std::mt19937::result_type> dist(0, this->level0Size_ - 1);
  std::uniform_int_distribution<std::mt19937::result_type> numValuesDist(1, this->level0Size_ / 16);

  const size_t numExperiments = 20;
  for (uint32_t i = 0; i < numExperiments; ++i) {
    const size_t numValues = numValuesDist(rng);

    for (size_t j = 0; j < numValues; ++j) {
      const size_t dataIndex = dist(rng);
      const size_t node1Index = dataIndex >> this->shiftStep_;
      this->data_[0][dataIndex] = dist(rng);
      this->subgraphs_[1]->node(node1Index).setIncomplete();
      // clean up nodes data. We don't touch other nodes' data so that the test fail if we evaluated
      // more than necessary.
      for (size_t level = 1; level < this->numLevels_; ++level) {
        this->data_[level][dataIndex >> (this->shiftStep_ * level)] = 0;
      }
    }

    result = std::accumulate(this->data_[0].begin(), this->data_[0].end(), size_t(0));

    this->forwardPropagator_(this->g_);
    this->executor_(this->mode_, this->g_);
    EXPECT_EQ(this->data_[this->numLevels_ - 1][0], result);
  }
}

TYPED_TEST(BigTree, SubgraphClearAndRebuild) {
  size_t result = std::accumulate(this->data_[0].begin(), this->data_[0].end(), size_t(0));

  setAllNodesIncomplete(this->g_);
  this->executor_(this->mode_, this->g_);
  EXPECT_EQ(this->data_[this->numLevels_ - 1][0], result);

  std::random_device dev;
  std::mt19937 rng(dev());
  std::uniform_int_distribution<std::mt19937::result_type> distLevel(1, this->numLevels_ - 1);

  const size_t numExperiments = 20;
  for (uint32_t i = 0; i < numExperiments; ++i) {
    const size_t level = distLevel(rng);

    this->subgraphs_[level]->clear();
    this->rebuildLevel(level);
    this->forwardPropagator_(this->g_);
    // clean up nodes data.
    for (size_t l = level; l < this->numLevels_; ++l) {
      std::fill(this->data_[l].begin(), this->data_[l].end(), size_t(0));
    }

    this->executor_(this->mode_, this->g_);

    EXPECT_EQ(this->data_[this->numLevels_ - 1][0], result);
  }
}

// Test BiPropNode edge cases to exercise set_union code path
// This happens when two nodes with existing biPropSets are connected
TEST(BiPropNode, SetUnionMerge) {
  dispenso::BiPropGraph g;
  int a = 0, b = 0, c = 0, d = 0;

  // Create two separate biProp chains first
  // Chain 1: N0 <-> N1
  dispenso::BiPropNode& N0 = g.addNode([&]() { a += 1; });
  dispenso::BiPropNode& N1 = g.addNode([&]() { b += a; });
  N1.biPropDependsOn(N0);

  // Chain 2: N2 <-> N3
  dispenso::BiPropNode& N2 = g.addNode([&]() { c += 2; });
  dispenso::BiPropNode& N3 = g.addNode([&]() { d += c; });
  N3.biPropDependsOn(N2);

  // Now connect the two chains - this should trigger set_union
  // because both N1 and N2 already have biPropSets
  N2.biPropDependsOn(N1);

  // Execute and verify
  dispenso::SingleThreadExecutor executor;
  setAllNodesIncomplete(g);
  executor(g);

  EXPECT_EQ(a, 1);
  EXPECT_EQ(b, 1);
  EXPECT_EQ(c, 2);
  EXPECT_EQ(d, 2);

  // Verify that marking one node incomplete propagates to all connected nodes
  dispenso::ForwardPropagator forwardPropagator;
  N0.setIncomplete();
  a = b = c = d = 0;
  forwardPropagator(g);
  executor(g);

  EXPECT_EQ(a, 1);
  EXPECT_EQ(b, 1);
  EXPECT_EQ(c, 2);
  EXPECT_EQ(d, 2);
}

// Test BiPropNode with node that already has biPropSet connecting to one without
TEST(BiPropNode, ExistingSetToNew) {
  dispenso::BiPropGraph g;
  int a = 0, b = 0, c = 0;

  // Create a biProp chain: N0 <-> N1
  dispenso::BiPropNode& N0 = g.addNode([&]() { a += 1; });
  dispenso::BiPropNode& N1 = g.addNode([&]() { b += a; });
  N1.biPropDependsOn(N0);

  // Create a standalone node N2
  dispenso::BiPropNode& N2 = g.addNode([&]() { c += b; });

  // Connect N2 to the existing chain - N1 has biPropSet, N2 doesn't
  N2.biPropDependsOn(N1);

  // Execute and verify
  dispenso::SingleThreadExecutor executor;
  setAllNodesIncomplete(g);
  executor(g);

  EXPECT_EQ(a, 1);
  EXPECT_EQ(b, 1);
  EXPECT_EQ(c, 1);
}

// Test BiPropNode with node without biPropSet connecting to one that has it
TEST(BiPropNode, NewToExistingSet) {
  dispenso::BiPropGraph g;
  int a = 0, b = 0, c = 0;

  // Create a biProp chain: N1 <-> N2
  dispenso::BiPropNode& N1 = g.addNode([&]() { b += 1; });
  dispenso::BiPropNode& N2 = g.addNode([&]() { c += b; });
  N2.biPropDependsOn(N1);

  // Create a standalone node N0
  dispenso::BiPropNode& N0 = g.addNode([&]() { a += 1; });

  // Connect the existing chain to N0 - N1 has biPropSet, N0 doesn't
  N1.biPropDependsOn(N0);

  // Execute and verify
  dispenso::SingleThreadExecutor executor;
  setAllNodesIncomplete(g);
  executor(g);

  EXPECT_EQ(a, 1);
  EXPECT_EQ(b, 1);
  EXPECT_EQ(c, 1);
}

// Test Subgraph accessors: numNodes() and node()
TEST(Subgraph, AccessorsBasic) {
  dispenso::Graph g;
  dispenso::Subgraph& subgraph = g.addSubgraph();

  int a = 0, b = 0, c = 0;

  // Add nodes to the subgraph
  dispenso::Node& N0 = subgraph.addNode([&]() { a = 1; });
  dispenso::Node& N1 = subgraph.addNode([&]() { b = 2; });
  dispenso::Node& N2 = subgraph.addNode([&]() { c = 3; });

  // Test numNodes()
  EXPECT_EQ(subgraph.numNodes(), 3u);

  // Test node() accessor (non-const)
  EXPECT_EQ(&subgraph.node(0), &N0);
  EXPECT_EQ(&subgraph.node(1), &N1);
  EXPECT_EQ(&subgraph.node(2), &N2);

  // Test const node() accessor
  const dispenso::Subgraph& constSubgraph = subgraph;
  EXPECT_EQ(&constSubgraph.node(0), &N0);
  EXPECT_EQ(&constSubgraph.node(1), &N1);
  EXPECT_EQ(&constSubgraph.node(2), &N2);

  // Verify nodes work correctly
  N1.dependsOn(N0);
  N2.dependsOn(N1);

  dispenso::SingleThreadExecutor executor;
  setAllNodesIncomplete(g);
  executor(g);

  EXPECT_EQ(a, 1);
  EXPECT_EQ(b, 2);
  EXPECT_EQ(c, 3);
}

// Test Subgraph move constructor
TEST(Subgraph, MoveConstructor) {
  dispenso::Graph g;

  // Create a subgraph and add nodes via a function that returns by move
  auto createSubgraphWithNodes = [&g]() -> dispenso::Subgraph& { return g.addSubgraph(); };

  dispenso::Subgraph& subgraph = createSubgraphWithNodes();

  int x = 0;
  subgraph.addNode([&]() { x = 42; });

  EXPECT_EQ(subgraph.numNodes(), 1u);

  // Execute to verify the moved subgraph works
  dispenso::SingleThreadExecutor executor;
  setAllNodesIncomplete(g);
  executor(g);

  EXPECT_EQ(x, 42);
}

// Test Graph-level node() accessors that delegate to subgraph 0
TEST(Graph, NodeAccessors) {
  dispenso::Graph g;

  int a = 0, b = 0;

  // Add nodes directly to the graph (goes to subgraph 0)
  dispenso::Node& N0 = g.addNode([&]() { a = 1; });
  dispenso::Node& N1 = g.addNode([&]() { b = 2; });

  // Test Graph::numNodes()
  EXPECT_EQ(g.numNodes(), 2u);

  // Test non-const node() accessor
  EXPECT_EQ(&g.node(0), &N0);
  EXPECT_EQ(&g.node(1), &N1);

  // Test const node() accessor
  const dispenso::Graph& constGraph = g;
  EXPECT_EQ(&constGraph.node(0), &N0);
  EXPECT_EQ(&constGraph.node(1), &N1);

  // Execute to verify
  dispenso::SingleThreadExecutor executor;
  setAllNodesIncomplete(g);
  executor(g);

  EXPECT_EQ(a, 1);
  EXPECT_EQ(b, 2);
}

// Test Graph::subgraph() accessors
TEST(Graph, SubgraphAccessors) {
  dispenso::Graph g;

  // Graph starts with subgraph 0
  EXPECT_EQ(g.numSubgraphs(), 1u);

  // Add more subgraphs
  dispenso::Subgraph& sg1 = g.addSubgraph();
  dispenso::Subgraph& sg2 = g.addSubgraph();

  EXPECT_EQ(g.numSubgraphs(), 3u);

  // Test non-const subgraph() accessor
  EXPECT_EQ(&g.subgraph(1), &sg1);
  EXPECT_EQ(&g.subgraph(2), &sg2);

  // Test const subgraph() accessor
  const dispenso::Graph& constGraph = g;
  EXPECT_EQ(&constGraph.subgraph(1), &sg1);
  EXPECT_EQ(&constGraph.subgraph(2), &sg2);
}

// Test Graph::clear() method
TEST(Graph, ClearMethod) {
  dispenso::Graph g;

  int x = 0;

  // Add some nodes
  g.addNode([&]() { x += 1; });
  g.addNode([&]() { x += 2; });
  g.addSubgraph();

  EXPECT_EQ(g.numNodes(), 2u);
  EXPECT_EQ(g.numSubgraphs(), 2u);

  // Clear the graph
  g.clear();

  // After clear, graph should have 1 empty subgraph
  EXPECT_EQ(g.numSubgraphs(), 1u);
  EXPECT_EQ(g.numNodes(), 0u);
}

#if defined(__cpp_exceptions)

// Exception safety tests for graph executors.
// Verify that exceptions propagate correctly and that the pool remains usable.

struct GraphTestException : std::runtime_error {
  GraphTestException() : std::runtime_error("graph test exception") {}
};

TEST(GraphExceptionSafety, SingleThreadExecutor) {
  for (int round = 0; round < 10; ++round) {
    dispenso::Graph g;
    int a = 0, b = 0;

    g.addNode([&]() { a = 1; });
    g.addNode([&]() { throw GraphTestException(); });
    g.addNode([&]() { b = 2; });

    dispenso::SingleThreadExecutor executor;
    setAllNodesIncomplete(g);
    EXPECT_THROW(executor(g), GraphTestException);
  }

  // Verify pool is still usable after exceptions
  dispenso::Graph g;
  int result = 0;
  g.addNode([&]() { result = 42; });

  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  dispenso::ConcurrentTaskSetExecutor executor;
  setAllNodesIncomplete(g);
  executor(tasks, g);
  EXPECT_EQ(result, 42);
}

TEST(GraphExceptionSafety, ParallelForExecutor) {
  for (int round = 0; round < 10; ++round) {
    dispenso::Graph g;
    std::atomic<int> count{0};

    // Build a diamond graph:
    //   N0 -> N1 (throws) -> N3
    //   N0 -> N2           -> N3
    dispenso::Node& n0 = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& n1 = g.addNode([&]() { throw GraphTestException(); });
    dispenso::Node& n2 = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& n3 = g.addNode([&]() { count.fetch_add(1); });

    n1.dependsOn(n0);
    n2.dependsOn(n0);
    n3.dependsOn(n1, n2);

    dispenso::TaskSet taskSet(dispenso::globalThreadPool());
    dispenso::ParallelForExecutor executor;
    setAllNodesIncomplete(g);
    EXPECT_THROW(executor(taskSet, g), GraphTestException);
  }

  // Verify pool is still usable
  dispenso::Graph g;
  int result = 0;
  g.addNode([&]() { result = 99; });

  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ParallelForExecutor executor;
  setAllNodesIncomplete(g);
  executor(taskSet, g);
  EXPECT_EQ(result, 99);
}

TEST(GraphExceptionSafety, ConcurrentTaskSetExecutor) {
  for (int round = 0; round < 10; ++round) {
    dispenso::Graph g;
    std::atomic<int> count{0};

    // Build a chain to exercise inline continuation:
    //   N0 -> N1 -> N2 (throws) -> N3
    dispenso::Node& n0 = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& n1 = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& n2 = g.addNode([&]() { throw GraphTestException(); });
    dispenso::Node& n3 = g.addNode([&]() { count.fetch_add(1); });

    n1.dependsOn(n0);
    n2.dependsOn(n1);
    n3.dependsOn(n2);

    dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;
    setAllNodesIncomplete(g);
    EXPECT_THROW(executor(tasks, g), GraphTestException);
  }

  // Verify pool is still usable
  dispenso::Graph g;
  int result = 0;
  g.addNode([&]() { result = 77; });

  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  dispenso::ConcurrentTaskSetExecutor executor;
  setAllNodesIncomplete(g);
  executor(tasks, g);
  EXPECT_EQ(result, 77);
}

TEST(GraphExceptionSafety, ConcurrentTaskSetExecutor_InlineContinuation) {
  // Specifically test the inline continuation path in evaluateNodeConcurrently.
  // Build a wide fan-out followed by a chain, so some nodes run inline.
  for (int round = 0; round < 10; ++round) {
    dispenso::Graph g;
    std::atomic<int> count{0};

    // Fan-out: root -> {A, B, C, D}
    // Chain from A: A -> E -> F (throws)
    dispenso::Node& root = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& nA = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& nB = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& nC = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& nD = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& nE = g.addNode([&]() { count.fetch_add(1); });
    dispenso::Node& nF = g.addNode([&]() { throw GraphTestException(); });

    nA.dependsOn(root);
    nB.dependsOn(root);
    nC.dependsOn(root);
    nD.dependsOn(root);
    nE.dependsOn(nA);
    nF.dependsOn(nE);

    dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;
    setAllNodesIncomplete(g);
    EXPECT_THROW(executor(tasks, g), GraphTestException);
  }

  // Verify pool is still usable
  dispenso::Graph g;
  int result = 0;
  g.addNode([&]() { result = 55; });

  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  dispenso::ConcurrentTaskSetExecutor executor;
  setAllNodesIncomplete(g);
  executor(tasks, g);
  EXPECT_EQ(result, 55);
}

TEST(GraphExceptionSafety, ConcurrentTaskSetExecutor_DeepGraphException) {
  // Build a "staircase" graph that would cause deep recursion in
  // evaluateNodeConcurrently without the depth guard. Each fork node has
  // 2 ready dependents: a leaf (followed inline by while loop) and the next
  // fork (scheduled, potentially inlined by CTS → recursion).
  //
  //   fork0 → leaf0
  //   fork0 → fork1 → leaf1
  //            fork1 → fork2 → leaf2
  //                     fork2 → fork3 ...
  //
  // With depth 2000 and no depth guard, CTS inline execution would create
  // ~2000 recursive stack frames — enough to overflow worker thread stacks.
  // The depth guard caps recursion by using ForceQueuingTag beyond the limit.
  constexpr int kDepth = 2000;

  for (int round = 0; round < 5; ++round) {
    dispenso::Graph g;
    std::atomic<int> executed{0};
    const int throwAt = kDepth / 2;

    // Create fork and leaf nodes
    std::vector<dispenso::Node*> forks(kDepth);
    std::vector<dispenso::Node*> leaves(kDepth);

    for (int i = 0; i < kDepth; ++i) {
      if (i == throwAt) {
        forks[i] = &g.addNode([&]() {
          executed.fetch_add(1, std::memory_order_relaxed);
          throw GraphTestException();
        });
      } else {
        forks[i] = &g.addNode([&]() { executed.fetch_add(1, std::memory_order_relaxed); });
      }
      leaves[i] = &g.addNode([&]() { executed.fetch_add(1, std::memory_order_relaxed); });
    }

    // Wire up: each fork depends on the previous fork
    // Each leaf depends on its fork
    // Add leaf dependency FIRST so it becomes the inline continuation,
    // and next fork becomes the "scheduled" dependent.
    for (int i = 0; i < kDepth; ++i) {
      leaves[i]->dependsOn(*forks[i]);
      if (i + 1 < kDepth) {
        forks[i + 1]->dependsOn(*forks[i]);
      }
    }

    dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
    dispenso::ConcurrentTaskSetExecutor executor;
    setAllNodesIncomplete(g);
    EXPECT_THROW(executor(tasks, g), GraphTestException);
  }

  // Verify pool is still usable
  dispenso::Graph g;
  int result = 0;
  g.addNode([&]() { result = 123; });

  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  dispenso::ConcurrentTaskSetExecutor executor;
  setAllNodesIncomplete(g);
  executor(tasks, g);
  EXPECT_EQ(result, 123);
}

#endif // __cpp_exceptions

// Non-exception deep graph test — verifies depth guard prevents stack overflow
// and all nodes execute correctly.
TEST(GraphDepthGuard, ConcurrentTaskSetExecutor_DeepGraph) {
  constexpr int kDepth = 2000;

  dispenso::Graph g;
  std::atomic<int> executed{0};

  std::vector<dispenso::Node*> forks(kDepth);
  std::vector<dispenso::Node*> leaves(kDepth);

  for (int i = 0; i < kDepth; ++i) {
    forks[i] = &g.addNode([&]() { executed.fetch_add(1, std::memory_order_relaxed); });
    leaves[i] = &g.addNode([&]() { executed.fetch_add(1, std::memory_order_relaxed); });
  }

  for (int i = 0; i < kDepth; ++i) {
    leaves[i]->dependsOn(*forks[i]);
    if (i + 1 < kDepth) {
      forks[i + 1]->dependsOn(*forks[i]);
    }
  }

  dispenso::ConcurrentTaskSet tasks(dispenso::globalThreadPool());
  dispenso::ConcurrentTaskSetExecutor executor;
  setAllNodesIncomplete(g);
  executor(tasks, g);

  // All 2*kDepth nodes must have executed
  EXPECT_EQ(executed.load(), 2 * kDepth);
}


================================================
FILE: tests/greedy_for_ranges_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <list>
#include <vector>

#include <dispenso/parallel_for.h>

#include <gtest/gtest.h>

#include "test_tid.h"

static void simpleInner(int w, int y, const std::vector<int>& image, std::atomic<int64_t>& sum) {
  const int* row = image.data() + y * w;
  int64_t s = 0;
  for (int i = 0; i < w; ++i) {
    s += row[i];
  }
  sum.fetch_add(s, std::memory_order_relaxed);
}

TEST(GreedyForRanges, SimpleLoop) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sum(0);

  dispenso::parallel_for(0, h, [w, &image, &sum](int y, int yend) {
    for (; y < yend; ++y) {
      simpleInner(w, y, image, sum);
    }
  });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
}

TEST(GreedyForRanges, ShouldNotInvokeIfEmptyRange) {
  int* myNullPtr = nullptr;

  dispenso::ParForOptions options;
  options.defaultChunking = dispenso::ParForChunking::kAuto;

  dispenso::parallel_for(0, 0, [myNullPtr](int i, int /*ie*/) { *myNullPtr = i; }, options);

  options.defaultChunking = dispenso::ParForChunking::kStatic;

  dispenso::parallel_for(0, 0, [myNullPtr](int i, int /*ie*/) { *myNullPtr = i; }, options);
}

template <typename StateContainer>
void loopWithStateImpl(dispenso::ThreadPool& pool = dispenso::globalThreadPool()) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  dispenso::TaskSet taskSet(pool);
  StateContainer state;
  dispenso::parallel_for(
      taskSet,
      state,
      []() { return int64_t{0}; },
      0,
      h,
      [w, &image](int64_t& sum, int y, int yend) {
        for (; y < yend; ++y) {
          int* row = image.data() + y * w;
          int64_t s = 0;
          for (int i = 0; i < w; ++i) {
            s += row[i];
          }
          sum += s;
        }
      });

  int64_t sum = 0;
  for (int64_t s : state) {
    sum += s;
  }

  EXPECT_EQ(sum, w * h * 7);
}

TEST(GreedyForRanges, LoopWithDequeState) {
  loopWithStateImpl<std::deque<int64_t>>();
}
TEST(GreedyForRanges, LoopWithVectorState) {
  loopWithStateImpl<std::vector<int64_t>>();
}
TEST(GreedyForRanges, LoopWithListState) {
  loopWithStateImpl<std::list<int64_t>>();
}

TEST(GreedyForRanges, ConcurrentLoopNoCoordination) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sumA(0);
  std::atomic<int64_t> sumB(0);

  std::thread tA([w, h, &image, &sumA]() {
    dispenso::parallel_for(0, h, [w, &image, &sumA](int y, int yend) {
      for (; y < yend; ++y) {
        simpleInner(w, y, image, sumA);
      }
    });
  });
  std::thread tB([w, h, &image, &sumB]() {
    dispenso::parallel_for(0, h, [w, &image, &sumB](int y, int yend) {
      for (; y < yend; ++y) {
        simpleInner(w, y, image, sumB);
      }
    });
  });

  tA.join();
  tB.join();

  EXPECT_EQ(sumA.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_EQ(sumB.load(std::memory_order_relaxed), w * h * 7);
}

TEST(GreedyForRanges, CoordinatedLoops) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sumA(0);
  std::atomic<int64_t> sumB(0);

  dispenso::TaskSet taskSet(dispenso::globalThreadPool());

  dispenso::ParForOptions options;
  options.wait = false;

  dispenso::parallel_for(
      taskSet,
      0,
      h,
      [w, &image, &sumA](int y, int yend) {
        for (; y < yend; ++y) {
          simpleInner(w, y, image, sumA);
        }
      },
      options);

  dispenso::parallel_for(taskSet, 0, h, [w, &image, &sumB](int y, int yend) {
    for (; y < yend; ++y) {
      simpleInner(w, y, image, sumB);
    }
  });

  EXPECT_EQ(sumA.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_EQ(sumB.load(std::memory_order_relaxed), w * h * 7);
}

static void concurrentLoop(
    dispenso::ConcurrentTaskSet& taskSet,
    int w,
    int h,
    const std::vector<int>& image,
    std::atomic<int64_t>& sum) {
  dispenso::ParForOptions options;
  options.wait = false;
  dispenso::parallel_for(
      taskSet,
      0,
      h,
      [w, &image, &sum](int y, int yend) {
        for (; y < yend; ++y) {
          simpleInner(w, y, image, sum);
        }
      },
      options);
}

TEST(GreedyForRanges, CoordinatedConcurrentLoops) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sumA(0);
  std::atomic<int64_t> sumB(0);

  dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

  dispenso::ParForOptions options;
  options.wait = false;
  dispenso::parallel_for(
      taskSet,
      0,
      h,
      [w, &image, &sumA](int y, int yend) {
        for (; y < yend; ++y) {
          simpleInner(w, y, image, sumA);
        }
      },
      options);

  std::thread thread(
      [&taskSet, w, h, &image, &sumB]() { concurrentLoop(taskSet, w, h, image, sumB); });
  thread.join();
  taskSet.wait();

  EXPECT_EQ(sumA.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_EQ(sumB.load(std::memory_order_relaxed), w * h * 7);
}

static void
testMaxThreads(size_t poolSize, uint32_t maxThreads, bool testStaticChunking, bool testWaitOption) {
  resetTestTid();
  size_t numAvailableThreads = poolSize + testWaitOption;
  std::vector<int> threadLocalSums(numAvailableThreads, 0);
  dispenso::ThreadPool pool(poolSize);
  dispenso::TaskSet tasks(pool);

  dispenso::ParForOptions options;
  options.maxThreads = maxThreads;
  options.wait = testWaitOption;
  options.defaultChunking =
      testStaticChunking ? dispenso::ParForChunking::kStatic : dispenso::ParForChunking::kAuto;

  auto func = [&threadLocalSums](int index, int indexEnd) {
    for (; index < indexEnd; ++index) {
      assert(index > 0); // for correctness of numNonZero
      std::this_thread::yield();
      threadLocalSums[getTestTid()] += index;
    }
  };

  dispenso::parallel_for(tasks, 1, 10000, func, options);

  if (!testWaitOption) {
    // We didn't tell the parallel_for to wait, so we need to do it here to ensure the loop is
    // complete.
    tasks.wait();
  }

  int total = 0;
  int numNonZero = 0;

  for (size_t i = 0; i < numAvailableThreads; ++i) {
    numNonZero += threadLocalSums[i] > 0;
    total += threadLocalSums[i];
  }

  // 0 indicates serial execution per API spec
  size_t translatedMaxThreads = maxThreads == 0 ? 1 : maxThreads;
  EXPECT_LE(numNonZero, std::min((size_t)translatedMaxThreads, numAvailableThreads));
  EXPECT_EQ(total, 49995000);
}

TEST(GreedyForRanges, OptionsMaxThreadsBigPoolStaticChunkingBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = true;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsBigPoolStaticChunkingNonBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = false;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsBigPoolAutoChunkingBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = true;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsBigPoolAutoChunkingNonBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = false;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSmallPoolStaticChunkingBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = true;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSmallPoolStaticChunkingNonBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = false;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSmallPoolAutoChunkingBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = true;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSmallPoolAutoChunkingNonBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = false;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSerialStaticChunkingBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = true;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSerialStaticChunkingNonBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = false;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSerialAutoChunkingBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = true;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyForRanges, OptionsMaxThreadsSerialAutoChunkingNonBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = false;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyForRanges, NegativeRangeLength) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(taskSet, 2, -2, [](auto /*index*/, auto /*indexEnd*/) {
    EXPECT_FALSE(true) << "Shouldn't enter this function at all";
  });
}

TEST(GreedyForRanges, NegativeRangeLengthBig) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(taskSet, 2147483647, -2147483647, [](auto /*index*/, auto /*indexEnd*/) {
    EXPECT_FALSE(true) << "Shouldn't enter this function at all";
  });
}

TEST(GreedyForRanges, ZeroLength2) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(taskSet, -77, -77, [](auto /*index*/, auto /*indexEnd*/) {
    EXPECT_FALSE(true) << "Shouldn't enter this function at all";
  });
}

TEST(GreedyForRanges, AvoidOverflow1) {
  std::atomic<uint32_t> count(0);
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(
      taskSet,
      std::numeric_limits<int16_t>::min(),
      std::numeric_limits<int16_t>::max(),
      [&count](auto index, auto indexEnd) {
        count.fetch_add(indexEnd - index, std::memory_order_relaxed);
      });

  EXPECT_EQ(count.load(), std::numeric_limits<uint16_t>::max());
}

TEST(GreedyForRanges, AvoidOverflow2) {
  dispenso::ThreadPool pool(8);
  dispenso::TaskSet taskSet(pool);
  dispenso::ParForOptions options;
  options.wait = false;
  options.defaultChunking = dispenso::ParForChunking::kAuto;

  std::vector<dispenso::CacheAligned<uint32_t>> vals;
  dispenso::parallel_for(
      taskSet,
      vals,
      []() { return uint32_t{0}; },
      std::numeric_limits<int32_t>::min() / 2 - 1,
      std::numeric_limits<int32_t>::max() / 2 + 1,
      [](auto& count, auto index, auto indexEnd) { count += indexEnd - index; },
      options);
  taskSet.wait();

  uint32_t count = 0;
  for (auto& v : vals) {
    count += v;
  }

  EXPECT_EQ(count, std::numeric_limits<uint32_t>::max() / 2 + 2);
}

TEST(GreedyForRanges, EmptyLoopsWaitIfToldTo) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ParForOptions options;
  options.wait = false;

  std::atomic<int> count(0);

  dispenso::parallel_for(
      taskSet,
      0,
      1000,
      [&count](int index, int indexEnd) {
        for (; index < indexEnd; ++index) {
          std::this_thread::sleep_for(std::chrono::microseconds(1));
          count.fetch_add(1, std::memory_order_acq_rel);
        }
      },
      options);

  dispenso::ParForOptions waitOptions;
  dispenso::parallel_for(
      taskSet,
      0,
      0,
      [](int /*index*/) { EXPECT_FALSE(true) << "Should not reach this lambda"; },
      waitOptions);

  EXPECT_EQ(count.load(), 1000);
}

TEST(GreedyForRanges, SingleLoopWaitIfToldTo) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ParForOptions options;
  options.wait = false;

  std::atomic<int> count(0);

  dispenso::parallel_for(
      taskSet,
      0,
      1000,
      [&count](int index, int indexEnd) {
        for (; index < indexEnd; ++index) {
          std::this_thread::sleep_for(std::chrono::microseconds(1));
          count.fetch_add(1, std::memory_order_acq_rel);
        }
      },
      options);

  dispenso::ParForOptions waitOptions;
  dispenso::parallel_for(taskSet, 0, 1, [](int i) { EXPECT_EQ(i, 0); }, waitOptions);

  EXPECT_EQ(count.load(), 1000);
}

TEST(GreedyForRanges, ZeroThreads) {
  // Using a threadpool with 0 threads should run via the calling thread.
  dispenso::ThreadPool pool(0);
  dispenso::TaskSet tasks(pool);

  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sum(0);

  dispenso::parallel_for(tasks, 0, h, [w, &image, &sum](int y, int yEnd) {
    for (; y < yEnd; ++y) {
      simpleInner(w, y, image, sum);
    }
  });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
}

TEST(GreedyForRanges, ZeroThreadsWithState) {
  // Using a threadpool with 0 threads should run via the calling thread.
  dispenso::ThreadPool pool(0);
  loopWithStateImpl<std::vector<int64_t>>(pool);
}


================================================
FILE: tests/greedy_for_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <list>
#include <vector>

#include <dispenso/parallel_for.h>

#include <gtest/gtest.h>

#include "test_tid.h"

static void simpleInner(int w, int y, const std::vector<int>& image, std::atomic<int64_t>& sum) {
  const int* row = image.data() + y * w;
  int64_t s = 0;
  for (int i = 0; i < w; ++i) {
    s += row[i];
  }
  sum.fetch_add(s, std::memory_order_relaxed);
}

TEST(GreedyFor, SimpleLoop) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sum(0);

  dispenso::parallel_for(0, h, [w, &image, &sum](int y) { simpleInner(w, y, image, sum); });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
}

TEST(GreedyFor, ShouldNotInvokeIfEmptyRange) {
  int* myNullPtr = nullptr;

  dispenso::ParForOptions options;
  options.defaultChunking = dispenso::ParForChunking::kAuto;

  dispenso::parallel_for(0, 0, [myNullPtr](int i) { *myNullPtr = i; }, options);

  options.defaultChunking = dispenso::ParForChunking::kStatic;

  dispenso::parallel_for(0, 0, [myNullPtr](int i) { *myNullPtr = i; }, options);
}

template <typename StateContainer>
void loopWithStateImpl(dispenso::ThreadPool& pool = dispenso::globalThreadPool()) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  dispenso::TaskSet taskSet(pool);
  StateContainer state;
  dispenso::parallel_for(
      taskSet,
      state,
      []() { return int64_t{0}; },
      0,
      h,
      [w, &image](int64_t& sum, int y) {
        int* row = image.data() + y * w;
        int64_t s = 0;
        for (int i = 0; i < w; ++i) {
          s += row[i];
        }
        sum += s;
      });

  int64_t sum = 0;
  for (int64_t s : state) {
    sum += s;
  }

  EXPECT_EQ(sum, w * h * 7);
}

TEST(GreedyFor, LoopWithDequeState) {
  loopWithStateImpl<std::deque<int64_t>>();
}
TEST(GreedyFor, LoopWithVectorState) {
  loopWithStateImpl<std::vector<int64_t>>();
}
TEST(GreedyFor, LoopWithListState) {
  loopWithStateImpl<std::list<int64_t>>();
}

TEST(GreedyFor, ConcurrentLoopNoCoordination) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sumA(0);
  std::atomic<int64_t> sumB(0);

  std::thread tA([w, h, &image, &sumA]() {
    dispenso::parallel_for(0, h, [w, &image, &sumA](int y) { simpleInner(w, y, image, sumA); });
  });
  std::thread tB([w, h, &image, &sumB]() {
    dispenso::parallel_for(0, h, [w, &image, &sumB](int y) { simpleInner(w, y, image, sumB); });
  });

  tA.join();
  tB.join();

  EXPECT_EQ(sumA.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_EQ(sumB.load(std::memory_order_relaxed), w * h * 7);
}

TEST(GreedyFor, CoordinatedLoops) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sumA(0);
  std::atomic<int64_t> sumB(0);

  dispenso::TaskSet taskSet(dispenso::globalThreadPool());

  dispenso::ParForOptions options;
  options.wait = false;

  dispenso::parallel_for(
      taskSet, 0, h, [w, &image, &sumA](int y) { simpleInner(w, y, image, sumA); }, options);

  dispenso::parallel_for(
      taskSet, 0, h, [w, &image, &sumB](int y) { simpleInner(w, y, image, sumB); });

  EXPECT_EQ(sumA.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_EQ(sumB.load(std::memory_order_relaxed), w * h * 7);
}

static void concurrentLoop(
    dispenso::ConcurrentTaskSet& taskSet,
    int w,
    int h,
    const std::vector<int>& image,
    std::atomic<int64_t>& sum) {
  dispenso::ParForOptions options;
  options.wait = false;
  dispenso::parallel_for(
      taskSet, 0, h, [w, &image, &sum](int y) { simpleInner(w, y, image, sum); }, options);
}

TEST(GreedyFor, CoordinatedConcurrentLoops) {
  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sumA(0);
  std::atomic<int64_t> sumB(0);

  dispenso::ConcurrentTaskSet taskSet(dispenso::globalThreadPool());

  dispenso::ParForOptions options;
  options.wait = false;
  dispenso::parallel_for(
      taskSet, 0, h, [w, &image, &sumA](int y) { simpleInner(w, y, image, sumA); }, options);

  std::thread thread(
      [&taskSet, w, h, &image, &sumB]() { concurrentLoop(taskSet, w, h, image, sumB); });
  thread.join();
  taskSet.wait();

  EXPECT_EQ(sumA.load(std::memory_order_relaxed), w * h * 7);
  EXPECT_EQ(sumB.load(std::memory_order_relaxed), w * h * 7);
}

static void
testMaxThreads(size_t poolSize, uint32_t maxThreads, bool testStaticChunking, bool testWaitOption) {
  resetTestTid();
  size_t numAvailableThreads = poolSize + testWaitOption;
  std::vector<int> threadLocalSums(numAvailableThreads, 0);
  dispenso::ThreadPool pool(poolSize);
  dispenso::TaskSet tasks(pool);

  dispenso::ParForOptions options;
  options.maxThreads = maxThreads;
  options.wait = testWaitOption;
  options.defaultChunking =
      testStaticChunking ? dispenso::ParForChunking::kStatic : dispenso::ParForChunking::kAuto;

  auto func = [&threadLocalSums](int index) {
    assert(index > 0); // for correctness of numNonZero
    std::this_thread::yield();
    threadLocalSums[getTestTid()] += index;
  };

  dispenso::parallel_for(tasks, 1, 10000, func, options);

  if (!testWaitOption) {
    // We didn't tell the parallel_for to wait, so we need to do it here to ensure the loop is
    // complete.
    tasks.wait();
  }

  int total = 0;
  int numNonZero = 0;

  for (size_t i = 0; i < numAvailableThreads; ++i) {
    numNonZero += threadLocalSums[i] > 0;
    total += threadLocalSums[i];
  }

  // 0 indicates serial execution per API spec
  size_t translatedMaxThreads = maxThreads == 0 ? 1 : maxThreads;
  EXPECT_LE(numNonZero, std::min((size_t)translatedMaxThreads, numAvailableThreads));
  EXPECT_EQ(total, 49995000);
}

TEST(GreedyFor, OptionsMaxThreadsBigPoolStaticChunkingBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = true;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsBigPoolStaticChunkingNonBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = false;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsBigPoolAutoChunkingBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = true;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsBigPoolAutoChunkingNonBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = false;
  testMaxThreads(8, 4, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSmallPoolStaticChunkingBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = true;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSmallPoolStaticChunkingNonBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = false;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSmallPoolAutoChunkingBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = true;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSmallPoolAutoChunkingNonBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = false;
  testMaxThreads(4, 8, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSerialStaticChunkingBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = true;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSerialStaticChunkingNonBlocking) {
  constexpr bool staticChunking = true;
  constexpr bool waitOption = false;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSerialAutoChunkingBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = true;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyFor, OptionsMaxThreadsSerialAutoChunkingNonBlocking) {
  constexpr bool staticChunking = false;
  constexpr bool waitOption = false;
  testMaxThreads(8, 0, staticChunking, waitOption);
}

TEST(GreedyFor, NegativeRangeLength) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(taskSet, 2, -2, [](auto /*index*/) {
    EXPECT_FALSE(true) << "Shouldn't enter this function at all";
  });
}

TEST(GreedyFor, NegativeRangeLengthBig) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(taskSet, 2147483647, -2147483647, [](auto /*index*/) {
    EXPECT_FALSE(true) << "Shouldn't enter this function at all";
  });
}

TEST(GreedyFor, ZeroLength2) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(taskSet, -77, -77, [](auto /*index*/) {
    EXPECT_FALSE(true) << "Shouldn't enter this function at all";
  });
}

TEST(GreedyFor, AvoidOverflow1) {
  std::atomic<uint32_t> count(0);
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::parallel_for(
      taskSet,
      std::numeric_limits<int16_t>::min(),
      std::numeric_limits<int16_t>::max(),
      [&count](auto /*index*/) { count.fetch_add(1, std::memory_order_relaxed); });

  EXPECT_EQ(count.load(), std::numeric_limits<uint16_t>::max());
}

TEST(GreedyFor, AvoidOverflow2) {
  dispenso::ThreadPool pool(8);
  dispenso::TaskSet taskSet(pool);
  dispenso::ParForOptions options;
  options.wait = false;
  options.defaultChunking = dispenso::ParForChunking::kAuto;

  std::vector<dispenso::CacheAligned<uint32_t>> vals;
  dispenso::parallel_for(
      taskSet,
      vals,
      []() { return uint32_t{0}; },
      std::numeric_limits<int32_t>::min() / 2 - 1,
      std::numeric_limits<int32_t>::max() / 2 + 1,
      [](auto& count, auto /*index*/) { ++count; },
      options);
  taskSet.wait();

  uint32_t count = 0;
  for (auto& v : vals) {
    count += v;
  }

  EXPECT_EQ(count, std::numeric_limits<uint32_t>::max() / 2 + 2);
}

TEST(GreedyFor, EmptyLoopsWaitIfToldTo) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ParForOptions options;
  options.wait = false;

  std::atomic<int> count(0);

  dispenso::parallel_for(
      taskSet,
      0,
      1000,
      [&count](int /*index*/) {
        std::this_thread::sleep_for(std::chrono::microseconds(1));
        count.fetch_add(1, std::memory_order_acq_rel);
      },
      options);

  dispenso::ParForOptions waitOptions;
  dispenso::parallel_for(
      taskSet,
      0,
      0,
      [](int /*index*/) { EXPECT_FALSE(true) << "Should not reach this lambda"; },
      waitOptions);

  EXPECT_EQ(count.load(), 1000);
}

TEST(GreedyFor, SingleLoopWaitIfToldTo) {
  dispenso::TaskSet taskSet(dispenso::globalThreadPool());
  dispenso::ParForOptions options;
  options.wait = false;

  std::atomic<int> count(0);

  dispenso::parallel_for(
      taskSet,
      0,
      1000,
      [&count](int /*index*/) {
        std::this_thread::sleep_for(std::chrono::microseconds(1));
        count.fetch_add(1, std::memory_order_acq_rel);
      },
      options);

  dispenso::ParForOptions waitOptions;
  dispenso::parallel_for(taskSet, 0, 1, [](int i) { EXPECT_EQ(i, 0); }, waitOptions);

  EXPECT_EQ(count.load(), 1000);
}

TEST(GreedyFor, ZeroThreads) {
  // Using a threadpool with 0 threads should run via the calling thread.
  dispenso::ThreadPool pool(0);
  dispenso::TaskSet tasks(pool);

  int w = 1000;
  int h = 1000;
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sum(0);

  dispenso::parallel_for(tasks, 0, h, [w, &image, &sum](int y) { simpleInner(w, y, image, sum); });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
}

TEST(GreedyFor, ZeroThreadsWithState) {
  // Using a threadpool with 0 threads should run via the calling thread.
  dispenso::ThreadPool pool(0);
  loopWithStateImpl<std::vector<int64_t>>(pool);
}

TEST(GreedyFor, SimpleLoopFewerItemsThanThreads) {
  int w = 1000;
  int h = 3;
  dispenso::ThreadPool pool(5);
  std::vector<int> image(w * h, 7);

  std::atomic<int64_t> sum(0);

  dispenso::TaskSet tasks(pool);
  dispenso::parallel_for(tasks, 0, h, [w, &image, &sum](int y) { simpleInner(w, y, image, sum); });

  EXPECT_EQ(sum.load(std::memory_order_relaxed), w * h * 7);
}


================================================
FILE: tests/latch_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <deque>
#include <thread>

#include <dispenso/latch.h>

#include <gtest/gtest.h>

using namespace std::chrono_literals;

TEST(Latch, ArriveAndWait) {
  size_t publishData = 0;
  dispenso::Latch latch(3);

  std::deque<std::thread> threads;

  for (size_t i = 0; i < 2; ++i) {
    threads.emplace_back([&latch, &publishData]() {
      latch.arrive_and_wait();
      EXPECT_EQ(publishData, 3);
    });
  }

  // Give plenty of time for hijinx if there were any bug.
  std::this_thread::sleep_for(10ms);

  publishData = 3;

  // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the
  // group, but only two threads waiting to check for a new value of publishData.  We do this
  // after setting the value of publishData, from only one thread (main thread).  After
  // arrive_and_wait, wait succeeds, and waiting threads are woken, and they should see the correct
  // value of publishData.
  latch.arrive_and_wait();

  for (auto& t : threads) {
    t.join();
  }
}

TEST(Latch, CountDown) {
  size_t publishData = 0;
  dispenso::Latch latch(3);

  std::deque<std::thread> threads;

  for (size_t i = 0; i < 2; ++i) {
    threads.emplace_back([&latch, &publishData]() {
      latch.count_down();

      if (latch.try_wait()) {
        EXPECT_EQ(publishData, 3);
      } else {
        latch.wait();
        EXPECT_EQ(publishData, 3);
      }
    });
  }

  publishData = 3;

  // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the
  // group, but only two threads waiting to check for a new value of publishData.  We do this
  // after setting the value of publishData, from only one thread (main thread).  After count_down,
  // wait succeeds, and waiting threads are woken, and they should see the correct value of
  // publishData.
  latch.count_down();

  // Wait isn't required here.

  for (auto& t : threads) {
    t.join();
  }
}

TEST(Latch, ArriveAndWaitWithCountDown) {
  size_t publishData = 0;
  dispenso::Latch latch(3);

  std::deque<std::thread> threads;

  for (size_t i = 0; i < 2; ++i) {
    threads.emplace_back([&latch, &publishData]() {
      latch.arrive_and_wait();
      EXPECT_EQ(publishData, 3);
    });
  }

  publishData = 3;

  // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the
  // group, but only two threads waiting to check for a new value of publishData.  We do this
  // after setting the value of publishData, from only one thread (main thread).  After count_down,
  // wait succeeds, and waiting threads are woken, and they should see the correct value of
  // publishData.
  latch.count_down();

  // Wait isn't required here.

  for (auto& t : threads) {
    t.join();
  }
}


================================================
FILE: tests/once_function_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/once_function.h>

#include <deque>

#include <gtest/gtest.h>

constexpr size_t kExtraSmall = 8;
constexpr size_t kSmall = 24;
constexpr size_t kMedium = 120;
constexpr size_t kLarge = 248;
constexpr size_t kExtraLarge = 10000;

using dispenso::OnceFunction;

TEST(OnceFunction, Empty) {
  OnceFunction f([]() {});
  f();
}

TEST(OnceFunction, MoveConstructor) {
  OnceFunction f([]() {});
  OnceFunction g(std::move(f));
  g();
}

TEST(OnceFunction, MoveOperator) {
  OnceFunction f([]() {});
  OnceFunction g;
  g = std::move(f);
  g();
}

template <size_t kSize>
void testSize() {
  constexpr size_t kNumElts = kSize - sizeof(int*);
  struct Foo {
    void operator()() {
      int s = 0;
      for (uint8_t b : buf) {
        s += b;
      }
      *sum = s;
    }
    uint8_t buf[kNumElts];
    int* sum;
  } foo;
  for (size_t i = 0; i < kNumElts; ++i) {
    foo.buf[i] = static_cast<uint8_t>(i);
  }
  int answer;
  foo.sum = &answer;
  OnceFunction f(foo);
  OnceFunction g(foo);
  g();
  f();
  int expected = 0;
  for (size_t i = 0; i < kNumElts; ++i) {
    expected += static_cast<int>(i & 255);
  }
  EXPECT_EQ(answer, expected);
}

template <>
void testSize<8>() {
  struct Foo {
    void operator()() {
      int s = 0;
      *sum = s;
    }
    int* sum;
  } foo;
  int answer;
  foo.sum = &answer;
  OnceFunction f(foo);
  OnceFunction g(foo);
  g();
  f();
  int expected = 0;
  EXPECT_EQ(answer, expected);
}

TEST(OnceFunction, ExtraSmall) {
  testSize<kExtraSmall>();
}

TEST(OnceFunction, Small) {
  testSize<kSmall>();
}

TEST(OnceFunction, Medium) {
  testSize<kMedium>();
}

TEST(OnceFunction, Large) {
  testSize<kLarge>();
}

TEST(OnceFunction, ExtraLarge) {
  testSize<kExtraLarge>();
}

TEST(OnceFunction, MoveWithResult) {
  int result = 5;
  OnceFunction f([&result]() { result = 17; });
  EXPECT_EQ(result, 5);
  OnceFunction g(std::move(f));
  EXPECT_EQ(result, 5);
  g();
  EXPECT_EQ(result, 17);
}

template <size_t kNumElts>
void ensureDestructor() {
  int value = 0;
  struct FooWithDestructor {
    void operator()() {
      ++*value;
    }
    ~FooWithDestructor() {
      ++*value;
    }
    uint8_t buf[kNumElts];
    int* value;
  } foo;

  foo.value = &value;

  OnceFunction f(foo);
  f();
  EXPECT_EQ(value, 2);
}

TEST(OnceFunction, EnsureDestructionExtraSmall) {
  ensureDestructor<kExtraSmall>();
}

TEST(OnceFunction, EnsureDestructionSmall) {
  ensureDestructor<kSmall>();
}

TEST(OnceFunction, EnsureDestructionMedium) {
  ensureDestructor<kMedium>();
}

TEST(OnceFunction, EnsureDestructionLarge) {
  ensureDestructor<kLarge>();
}

TEST(OnceFunction, EnsureDestructionExtraLarge) {
  ensureDestructor<kExtraLarge>();
}

template <size_t alignment>
struct EnsureAlign {
  void operator()() {
    uintptr_t bloc = reinterpret_cast<uintptr_t>(&b);
    EXPECT_EQ(0, bloc & (alignment - 1)) << "broken for alignment: " << alignment;
  }

  alignas(alignment) char b = 0;
};

TEST(OnceFunction, EnsureAlignment1) {
  EnsureAlign<1> e;
  OnceFunction f(e);
  f();
}

TEST(OnceFunction, EnsureAlignment2) {
  EnsureAlign<2> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment4) {
  EnsureAlign<4> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment8) {
  EnsureAlign<8> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment16) {
  EnsureAlign<16> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment32) {
  EnsureAlign<32> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment64) {
  EnsureAlign<64> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment128) {
  EnsureAlign<128> e;
  OnceFunction f(e);
  f();
}
TEST(OnceFunction, EnsureAlignment256) {
  EnsureAlign<256> e;
  OnceFunction f(e);
  f();
}


================================================
FILE: tests/pipeline_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/pipeline.h>

#include <numeric>

#include <gtest/gtest.h>

#if __cplusplus >= 201703L
template <typename T>
using TestOptional = std::optional<T>;
#else
template <typename T>
using TestOptional = dispenso::OpResult<T>;
#endif // C++17

TEST(Pipeline, SingleStageSerial) {
  int counter = 0;

  dispenso::pipeline([&counter]() {
    if (counter++ < 10) {
      return true;
    }
    return false;
  });

  EXPECT_EQ(counter, 11);
}

TEST(Pipeline, SingleStageSerialPassRefObject) {
  int counter = 0;

  auto single = [&counter]() {
    if (counter++ < 10) {
      return true;
    }
    return false;
  };

  dispenso::pipeline(single);

  EXPECT_EQ(counter, 11);
}

TEST(Pipeline, MultiStageSerial) {
  std::vector<int> inputs(10);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<int> sum(0);
  dispenso::pipeline(
      [&inputs, it = inputs.begin()]() mutable -> TestOptional<int> {
        if (it != inputs.end()) {
          return *it++;
        }
        return {};
      },
      [](auto num) -> TestOptional<int> {
        if (num > 2 && num < 5) {
          return {};
        }
        return num * num;
      },
      [](int num) { return num + 5; },
      [&sum](auto num) { sum.fetch_add(num, std::memory_order_relaxed); });

  int actualSum = 0;
  for (int i = 0; i < 10; ++i) {
    if (i <= 2 || i >= 5) {
      actualSum += i * i + 5;
    }
  }

  EXPECT_EQ(sum.load(std::memory_order_acquire), actualSum);
}

#if __cplusplus >= 201703L
// This test is nearly identical to the one above, but ensures that we always are testing
// dispenso::OpResult, because the rest of the tests use std::optional if we are in C++17 mode.
TEST(Pipeline, MultiStageSerialOpResult) {
  std::vector<int> inputs(10);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<int> sum(0);
  dispenso::pipeline(
      [&inputs, it = inputs.begin()]() mutable -> dispenso::OpResult<int> {
        if (it != inputs.end()) {
          return *it++;
        }
        return {};
      },
      [](auto num) -> dispenso::OpResult<int> {
        if (num > 2 && num < 5) {
          return {};
        }
        return num * num;
      },
      [](int num) { return num + 5; },
      [&sum](auto num) { sum.fetch_add(num, std::memory_order_relaxed); });

  int actualSum = 0;
  for (int i = 0; i < 10; ++i) {
    if (i <= 2 || i >= 5) {
      actualSum += i * i + 5;
    }
  }

  EXPECT_EQ(sum.load(std::memory_order_acquire), actualSum);
}
#endif // C++17

TEST(Pipeline, SingleStageParallel) {
  std::atomic<int> counter(0);

  constexpr int kMaxConcurrency = 4;

  dispenso::pipeline(
      dispenso::stage(
          [&counter]() {
            if (counter.fetch_add(1, std::memory_order_acq_rel) < 10) {
              return true;
            }
            return false;
          },
          kMaxConcurrency));

  EXPECT_LE(counter.load(std::memory_order_acquire), 10 + kMaxConcurrency);
}

TEST(Pipeline, SingleStageParallelPassPrebuiltStage) {
  std::atomic<int> counter(0);

  constexpr int kMaxConcurrency = 4;

  auto stage = dispenso::stage(
      [&counter]() {
        if (counter.fetch_add(1, std::memory_order_acq_rel) < 10) {
          return true;
        }
        return false;
      },
      kMaxConcurrency);

  dispenso::pipeline(stage);

  EXPECT_LE(counter.load(std::memory_order_acquire), 10 + kMaxConcurrency);
}

TEST(Pipeline, MultiStageGenIsParallel) {
  constexpr int kNumInputs = 1000;
  std::vector<int> inputs(kNumInputs);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<int> sum(0);
  std::atomic<size_t> counter(0);

  dispenso::pipeline(
      dispenso::stage(
          [&counter, &inputs]() -> TestOptional<int> {
            size_t cur = counter.fetch_add(1, std::memory_order_acq_rel);
            if (cur < inputs.size()) {
              return inputs[cur];
            }
            return {};
          },
          3),
      [](auto num) -> TestOptional<int> {
        if (num > 2 && num < 5) {
          return {};
        }
        return num * num;
      },
      [](int num) { return num + 5; },
      [&sum](auto num) { sum.fetch_add(num, std::memory_order_relaxed); });

  int actualSum = 0;
  for (int i = 0; i < kNumInputs; ++i) {
    if (i <= 2 || i >= 5) {
      actualSum += i * i + 5;
    }
  }

  EXPECT_EQ(sum.load(std::memory_order_acquire), actualSum);
}

struct Gen {
  Gen(std::atomic<size_t>& c, std::vector<int>& i) : counter(c), inputs(i) {}
  TestOptional<int*> operator()() {
    size_t cur = counter.fetch_add(1, std::memory_order_acq_rel);
    if (cur < inputs.size()) {
      return &inputs[cur];
    }
    return {};
  };

  std::atomic<size_t>& counter;
  std::vector<int>& inputs;
};

struct Xform0 {
  TestOptional<int*> operator()(int* n) {
    int& num = *n;
    if (num > 2 && num < 5) {
      return {};
    }
    num *= num;
    return n;
  };
};

struct Xform1 {
  int* operator()(int* num) {
    *num += 5;
    return num;
  }
};

struct Sink {
  void operator()(int* num) {
    *num /= 2;
  }
};

TEST(Pipeline, MultiStageCarryPointers) {
  constexpr size_t kNumInputs = 1000;
  std::vector<int> inputs(kNumInputs);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<size_t> counter(0);

  Gen gen(counter, inputs);
  Xform0 xform0;
  Xform1 xform1;
  Sink sink;
  dispenso::pipeline(gen, xform0, xform1, sink);

  for (size_t i = 0; i < kNumInputs; ++i) {
    if (i <= 2 || i >= 5) {
      EXPECT_EQ(inputs[i], ((i * i) + 5) / 2);
    }
  }
}

TEST(Pipeline, MultiStageCarryPointers2) {
  constexpr size_t kNumInputs = 1000;
  std::vector<int> inputs(kNumInputs);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<size_t> counter(0);

  dispenso::pipeline(Gen(counter, inputs), Xform0(), Xform1(), Sink());

  for (size_t i = 0; i < kNumInputs; ++i) {
    if (i <= 2 || i >= 5) {
      EXPECT_EQ(inputs[i], ((i * i) + 5) / 2);
    }
  }
}

TEST(Pipeline, MultiStageCarryPointersMultiFilterParallel) {
  constexpr size_t kPar = 8;
  constexpr size_t kNumInputs = 1000;
  std::vector<int> inputs(kNumInputs);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<size_t> counter(0);

  dispenso::pipeline(
      dispenso::stage(Gen(counter, inputs), kPar),
      dispenso::stage(Xform0(), kPar),
      dispenso::stage(
          [&inputs](int* in) -> TestOptional<int*> {
            if (in - inputs.data() > 75 && in - inputs.data() < 80) {
              return {};
            }
            return in;
          },
          kPar),
      dispenso::stage(Xform1(), kPar),
      dispenso::stage(Sink(), kPar));

  for (size_t i = 0; i < kNumInputs; ++i) {
    if (i > 2 && i < 5) {
      EXPECT_EQ(inputs[i], i) << " at index " << i;
    } else if (i > 75 && i < 80) {
      EXPECT_EQ(inputs[i], i * i) << " at index " << i;
    } else {
      EXPECT_EQ(inputs[i], ((i * i) + 5) / 2) << " at index " << i;
    }
  }
}

TEST(Pipeline, MultiStageCarryPointersMultiFilterUnlimitedParallel) {
  constexpr size_t kPar = dispenso::kStageNoLimit;
  constexpr size_t kNumInputs = 1000;
  std::vector<int> inputs(kNumInputs);
  std::iota(inputs.begin(), inputs.end(), 0);
  std::atomic<size_t> counter(0);

  dispenso::pipeline(
      dispenso::stage(Gen(counter, inputs), kPar),
      dispenso::stage(Xform0(), kPar),
      dispenso::stage(
          [&inputs](int* in) -> TestOptional<int*> {
            if (in - inputs.data() > 75 && in - inputs.data() < 80) {
              return {};
            }
            return in;
          },
          kPar),
      dispenso::stage(Xform1(), kPar),
      dispenso::stage(Sink(), kPar));

  for (size_t i = 0; i < kNumInputs; ++i) {
    if (i > 2 && i < 5) {
      EXPECT_EQ(inputs[i], i) << " at index " << i;
    } else if (i > 75 && i < 80) {
      EXPECT_EQ(inputs[i], i * i) << " at index " << i;
    } else {
      EXPECT_EQ(inputs[i], ((i * i) + 5) / 2) << " at index " << i;
    }
  }
}

static size_t g_count = 0;

static TestOptional<size_t> funkGen() {
  if (g_count < 10) {
    return g_count++;
  }
  return {};
}

static std::atomic<size_t> g_sum(0);
static void funkSink(size_t in) {
  g_sum.fetch_add(in, std::memory_order_acq_rel);
}

TEST(Pipeline, PipelineFunctions) {
  g_count = 0;
  g_sum.store(0);

  dispenso::pipeline(funkGen, funkSink);

  EXPECT_EQ(45, g_sum.load(std::memory_order_acquire));
}

TEST(Pipeline, PipelineMoveOnly) {
  std::atomic<size_t> sum(0);

  dispenso::pipeline(
      [counter = 0]() mutable -> TestOptional<std::unique_ptr<size_t>> {
        if (counter < 10) {
          return std::make_unique<size_t>(counter++);
        }
        return {};
      },
      [](std::unique_ptr<size_t> val) {
        *val += 1;
        return val;
      },
      [&sum](std::unique_ptr<size_t> val) { sum.fetch_add(*val); });

  EXPECT_EQ(55, sum.load(std::memory_order_acquire));
}

TEST(Pipeline, PipelineMoveOnlyWithFiltering) {
  std::atomic<size_t> sum(0);

  dispenso::pipeline(
      [counter = 0]() mutable -> TestOptional<std::unique_ptr<size_t>> {
        if (counter < 10) {
          return std::make_unique<size_t>(counter++);
        }
        return {};
      },
      [](std::unique_ptr<size_t> val) -> TestOptional<std::unique_ptr<size_t>> {
        if (*val == 5) {
          return {};
        }
        *val += 1;
        return val;
      },
      [&sum](std::unique_ptr<size_t> val) { sum.fetch_add(*val); });

  EXPECT_EQ(49, sum.load(std::memory_order_acquire));
}

TEST(Pipeline, ZeroSizeThreadPool) {
  g_count = 0;
  g_sum.store(0);

  dispenso::ThreadPool pool(0);

  dispenso::pipeline(pool, funkGen, funkSink);

  EXPECT_EQ(45, g_sum.load(std::memory_order_acquire));
}

TEST(Pipeline, SerialStageStackBound) {
  constexpr int kNumItems = 100000;
  dispenso::ThreadPool pool(4);
  std::atomic<int> processed{0};
  dispenso::pipeline(
      pool,
      [n = 0, numItems = kNumItems]() mutable -> dispenso::OpResult<int> {
        return n < numItems ? dispenso::OpResult<int>(n++) : dispenso::OpResult<int>();
      },
      [&](int) { processed.fetch_add(1, std::memory_order_relaxed); });
  EXPECT_EQ(processed.load(), kNumItems);
}

#if defined(__cpp_exceptions)

// Helper: run a throwing pipeline multiple times, then verify recovery by running
// a non-throwing pipeline on the same pool and checking all items are processed.
template <typename PipelineFunc>
void runExceptionSafetyTest(
    dispenso::ThreadPool& pool,
    int numItems,
    int numRounds,
    PipelineFunc&& throwingPipeline) {
  for (int round = 0; round < numRounds; ++round) {
    try {
      throwingPipeline(pool, numItems);
    } catch (...) {
    }
  }

  // Recovery: run a clean pipeline on the same pool to verify it's still usable.
  std::atomic<int> processed{0};
  dispenso::pipeline(
      pool,
      [n = 0, numItems]() mutable -> dispenso::OpResult<int> {
        return n < numItems ? dispenso::OpResult<int>(n++) : dispenso::OpResult<int>();
      },
      dispenso::stage([&](int) { processed.fetch_add(1, std::memory_order_relaxed); }, 1));
  EXPECT_EQ(processed.load(), numItems);
}

// Sink stage throws (serial, limit=1)
TEST(Pipeline, ExceptionSafety_SinkSerial) {
  dispenso::ThreadPool pool(1);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage([](int) { throw std::runtime_error("sink"); }, 1));
  });
}

// Sink stage throws (parallel, limit=4)
TEST(Pipeline, ExceptionSafety_SinkParallel) {
  dispenso::ThreadPool pool(4);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage([](int) { throw std::runtime_error("sink_par"); }, 4));
  });
}

// Sink stage throws (unlimited)
TEST(Pipeline, ExceptionSafety_SinkUnlimited) {
  dispenso::ThreadPool pool(4);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage(
            [](int) { throw std::runtime_error("sink_unlim"); }, dispenso::kStageNoLimit));
  });
}

// Transform stage throws (serial)
TEST(Pipeline, ExceptionSafety_TransformSerial) {
  dispenso::ThreadPool pool(2);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage([](int) -> int { throw std::runtime_error("transform"); }, 1),
        [](int) {});
  });
}

// Transform stage throws (parallel)
TEST(Pipeline, ExceptionSafety_TransformParallel) {
  dispenso::ThreadPool pool(4);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage([](int) -> int { throw std::runtime_error("transform_par"); }, 4),
        [](int) {});
  });
}

// Filtering (OpTransform) stage throws
TEST(Pipeline, ExceptionSafety_OpTransform) {
  dispenso::ThreadPool pool(2);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage(
            [](int) -> dispenso::OpResult<int> { throw std::runtime_error("op_transform"); }, 2),
        [](int) {});
  });
}

// Generator throws
TEST(Pipeline, ExceptionSafety_Generator) {
  dispenso::ThreadPool pool(2);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int) {
    dispenso::pipeline(
        p, []() -> dispenso::OpResult<int> { throw std::runtime_error("generator"); }, [](int) {});
  });
}

// SingleStage pipeline throws
TEST(Pipeline, ExceptionSafety_SingleStage) {
  dispenso::ThreadPool pool(1);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int) {
    dispenso::pipeline(p, []() -> bool { throw std::runtime_error("single"); });
  });
}

// Multi-stage pipeline: middle transform throws
TEST(Pipeline, ExceptionSafety_MultiStageMiddle) {
  dispenso::ThreadPool pool(4);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        dispenso::stage([](int v) -> int { return v * 2; }, 2),
        dispenso::stage([](int) -> int { throw std::runtime_error("middle"); }, 2),
        dispenso::stage([](int v) -> int { return v + 1; }, 2),
        [](int) {});
  });
}

// Sink throws with unrelated function as generator (no stage wrapper)
TEST(Pipeline, ExceptionSafety_PlainFunctionSink) {
  dispenso::ThreadPool pool(2);
  runExceptionSafetyTest(pool, 50, 40, [](auto& p, int n) {
    dispenso::pipeline(
        p,
        [n, i = 0]() mutable -> dispenso::OpResult<int> {
          return i < n ? dispenso::OpResult<int>(i++) : dispenso::OpResult<int>();
        },
        [](int) { throw std::runtime_error("plain_sink"); });
  });
}

#endif // __cpp_exceptions


================================================
FILE: tests/pool_allocator_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/pool_allocator.h>

#include <deque>

#include <gtest/gtest.h>

TEST(PoolAllocator, SimpleMallocFree) {
  dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);

  char* buf = allocator.alloc();

  *buf = 'a';

  allocator.dealloc(buf);
}

TEST(PoolAllocator, TrackAllocations) {
  std::map<char*, size_t> allocMap;

  auto allocFunc = [&allocMap](size_t len) -> void* {
    char* ret = reinterpret_cast<char*>(::malloc(len));
    allocMap.emplace(ret, len);
    return ret;
  };

  auto deallocFunc = [&allocMap](void* ptr) {
    EXPECT_EQ(1, allocMap.erase(reinterpret_cast<char*>(ptr)));
    ::free(ptr);
  };

  // Check to make sure that the ptr returned by the allocator below is in one of the buffers
  // generated by allocFunc.  We do this by examining the closest buffer (via lower_bound) in the
  // map, and then verify that that buffer contains ptr.
  auto checkInValidRange = [&allocMap](char* ptr) {
    auto it = allocMap.upper_bound(ptr);
    --it;
    EXPECT_GE(ptr, it->first);
    EXPECT_LT(ptr, it->first + it->second);
    return ptr;
  };

  {
    dispenso::PoolAllocator allocator(64, 256, allocFunc, deallocFunc);

    char* bufs[5];

    bufs[0] = checkInValidRange(allocator.alloc());

    EXPECT_EQ(1, allocMap.size());

    bufs[1] = checkInValidRange(allocator.alloc());

    EXPECT_EQ(1, allocMap.size());

    allocator.dealloc(bufs[0]);

    EXPECT_EQ(1, allocMap.size());

    bufs[0] = checkInValidRange(allocator.alloc());

    EXPECT_EQ(1, allocMap.size());

    bufs[2] = checkInValidRange(allocator.alloc());

    EXPECT_EQ(1, allocMap.size());

    bufs[3] = checkInValidRange(allocator.alloc());

    EXPECT_EQ(1, allocMap.size());

    bufs[4] = checkInValidRange(allocator.alloc());

    EXPECT_EQ(2, allocMap.size());

    allocator.dealloc(bufs[4]);
    EXPECT_LE(2, allocMap.size());
  }

  EXPECT_EQ(allocMap.size(), 0);
}

TEST(PoolAllocator, SimpleThreaded) {
  constexpr size_t kNumThreads = 8;

  dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);

  std::deque<std::thread> threads;

  for (size_t i = 0; i < kNumThreads; ++i) {
    threads.emplace_back([&allocator, tid = i]() {
      constexpr size_t kNumBufs = 8;
      char* bufs[kNumBufs];

      for (size_t i = 0; i < 1000; ++i) {
        for (size_t j = 0; j < kNumBufs; ++j) {
          bufs[j] = allocator.alloc();
          *bufs[j] = static_cast<char>(tid);
        }
        for (size_t j = 0; j < kNumBufs; ++j) {
          EXPECT_EQ(*bufs[j], tid);
          allocator.dealloc(bufs[j]);
        }
      }
    });
  }

  for (auto& t : threads) {
    t.join();
  }
}

TEST(PoolAllocator, Arena) {
  dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);

  std::vector<char*> vec(2000);
  for (char*& c : vec) {
    c = allocator.alloc();
    std::fill_n(c, 64, 0x7f);
  }

  for (char* c : vec) {
    EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x7f; }));
  }

  allocator.clear();
  vec.resize(128);
  for (char*& c : vec) {
    c = allocator.alloc();
    std::fill_n(c, 64, 0x22);
  }

  for (char* c : vec) {
    EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x22; }));
  }

  allocator.clear();
  vec.resize(48);
  for (char*& c : vec) {
    c = allocator.alloc();
    std::fill_n(c, 64, 0x11);
  }

  for (char* c : vec) {
    EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x11; }));
  }
}

TEST(NoLockPoolAllocator, SimpleMallocFree) {
  // Test the non-thread-safe version
  dispenso::NoLockPoolAllocator allocator(64, 256, ::malloc, ::free);

  char* buf = allocator.alloc();
  *buf = 'a';
  allocator.dealloc(buf);
}

TEST(NoLockPoolAllocator, MultipleAllocDealloc) {
  dispenso::NoLockPoolAllocator allocator(32, 128, ::malloc, ::free);

  // Allocate several chunks
  std::vector<char*> bufs;
  for (int i = 0; i < 10; ++i) {
    char* buf = allocator.alloc();
    std::fill_n(buf, 32, static_cast<char>(i));
    bufs.push_back(buf);
  }

  // Verify contents
  for (int i = 0; i < 10; ++i) {
    EXPECT_TRUE(
        std::all_of(bufs[static_cast<size_t>(i)], bufs[static_cast<size_t>(i)] + 32, [i](char v) {
          return v == static_cast<char>(i);
        }));
  }

  // Dealloc all
  for (char* buf : bufs) {
    allocator.dealloc(buf);
  }
}

TEST(PoolAllocator, TotalChunkCapacity) {
  size_t allocCount = 0;
  auto allocFunc = [&allocCount](size_t len) -> void* {
    ++allocCount;
    return ::malloc(len);
  };

  dispenso::PoolAllocator allocator(64, 256, allocFunc, ::free);

  // Initially no backing allocations
  EXPECT_EQ(allocator.totalChunkCapacity(), 0u);

  // First alloc triggers a backing allocation
  char* buf1 = allocator.alloc();
  EXPECT_EQ(allocCount, 1u);
  EXPECT_EQ(allocator.totalChunkCapacity(), 4u); // 256 / 64 = 4 chunks

  // Allocate remaining chunks in first slab
  char* buf2 = allocator.alloc();
  char* buf3 = allocator.alloc();
  char* buf4 = allocator.alloc();
  EXPECT_EQ(allocCount, 1u); // Still just 1 backing allocation
  EXPECT_EQ(allocator.totalChunkCapacity(), 4u);

  // Next alloc triggers another backing allocation
  char* buf5 = allocator.alloc();
  EXPECT_EQ(allocCount, 2u);
  EXPECT_EQ(allocator.totalChunkCapacity(), 8u); // 2 slabs * 4 chunks

  allocator.dealloc(buf1);
  allocator.dealloc(buf2);
  allocator.dealloc(buf3);
  allocator.dealloc(buf4);
  allocator.dealloc(buf5);
}

TEST(PoolAllocator, SingleChunkPerSlab) {
  // Edge case: chunkSize equals allocSize, so only 1 chunk per slab
  size_t allocCount = 0;
  auto allocFunc = [&allocCount](size_t len) -> void* {
    ++allocCount;
    return ::malloc(len);
  };

  dispenso::PoolAllocator allocator(128, 128, allocFunc, ::free);

  EXPECT_EQ(allocator.totalChunkCapacity(), 0u);

  char* buf1 = allocator.alloc();
  EXPECT_EQ(allocCount, 1u);
  EXPECT_EQ(allocator.totalChunkCapacity(), 1u);

  char* buf2 = allocator.alloc();
  EXPECT_EQ(allocCount, 2u);
  EXPECT_EQ(allocator.totalChunkCapacity(), 2u);

  allocator.dealloc(buf1);
  allocator.dealloc(buf2);
}

TEST(PoolAllocator, ClearEmptyAllocator) {
  // Edge case: calling clear() when nothing has been allocated
  dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);

  // Should not crash
  allocator.clear();
  allocator.clear();

  // Can still allocate after clear
  char* buf = allocator.alloc();
  *buf = 'x';
  EXPECT_EQ(*buf, 'x');
  allocator.dealloc(buf);
}

TEST(PoolAllocator, MultipleClearCycles) {
  size_t allocCount = 0;
  size_t deallocCount = 0;

  auto allocFunc = [&allocCount](size_t len) -> void* {
    ++allocCount;
    return ::malloc(len);
  };

  auto deallocFunc = [&deallocCount](void* ptr) {
    ++deallocCount;
    ::free(ptr);
  };

  {
    dispenso::PoolAllocator allocator(64, 256, allocFunc, deallocFunc);

    // First cycle
    std::vector<char*> bufs;
    for (int i = 0; i < 20; ++i) {
      bufs.push_back(allocator.alloc());
    }
    size_t firstCycleAllocs = allocCount;
    EXPECT_GT(firstCycleAllocs, 0u);

    allocator.clear();
    EXPECT_EQ(deallocCount, 0u); // clear() doesn't deallocate

    // Second cycle - should reuse backing allocations
    bufs.clear();
    for (int i = 0; i < 10; ++i) {
      bufs.push_back(allocator.alloc());
    }
    EXPECT_EQ(allocCount, firstCycleAllocs); // No new backing allocations

    allocator.clear();

    // Third cycle
    bufs.clear();
    for (int i = 0; i < 5; ++i) {
      bufs.push_back(allocator.alloc());
    }
    EXPECT_EQ(allocCount, firstCycleAllocs); // Still no new backing allocations
  }

  // Destructor deallocates all backing allocations
  EXPECT_EQ(deallocCount, allocCount);
}

TEST(PoolAllocator, ReuseAfterDealloc) {
  // Verify that deallocated chunks are reused
  dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);

  char* buf1 = allocator.alloc();
  std::fill_n(buf1, 64, 'A');

  allocator.dealloc(buf1);

  // Next alloc should return the same chunk (LIFO behavior)
  char* buf2 = allocator.alloc();
  EXPECT_EQ(buf1, buf2);

  // Contents may have been overwritten, but that's expected
  allocator.dealloc(buf2);
}


================================================
FILE: tests/priority_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <chrono>
#include <cmath>

#include <dispenso/completion_event.h>
#include <dispenso/latch.h>
#include <dispenso/parallel_for.h>
#include <dispenso/priority.h>
#include <dispenso/timing.h>

#include <gtest/gtest.h>

// NOTE: This isn't suitable for an automated unit test for multiple reasons.  With OS
// scheduling we have some amount of nondeterminism.  Additionally, many (most?) machines will not
// have permissions for kHigh and kRealtime priorities depending on OS and policies.
//
//  On Linux, with permissions appropriate, this test passes about 14/15 times.  On the single
//  failure, I see an average sleep error like this:
// Expected: (info[2].error()) >= (info[3].error()), actual: 5.25784e-05 vs 5.41972e-05
// or about 50ish microseconds average error for both kHigh and kRealtime priorities.

using namespace std::chrono_literals;

struct ThreadInfo {
  uint64_t count = 0;
  double sleepErrorSum = 0.0;
  bool prioOk = false;

  double error() const {
    return sleepErrorSum / static_cast<double>(count);
  }
};

void run(
    size_t index,
    ThreadInfo& info,
    dispenso::CompletionEvent& notifier,
    dispenso::Latch& started) {
  switch (index) {
    case 0:
      info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kLow);
      break;
    case 1:
      info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kNormal);
      break;
    case 2:
      info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kHigh);
      break;
    case 3:
      info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kRealtime);
      break;
    default:
      info.prioOk = true;
      break;
  }

  // Ensure all threads reach this point before we begin, so that we don't let the first threads
  // make progress before the system is bogged down.
  started.arrive_and_wait();

  // Keep other threads busy.  If cores are idle, the result will be a crapshoot.
  if (index > 3) {
    while (!notifier.completed()) {
      ++info.count;
#if defined(DISPENSO_HAS_TSAN)
      // In TSAN atomics are implemented via reader/writer locks, and I believe these are not
      // guaranteeing progress.  We need to take some time out from the tight loop calling
      // notifier.completed() in order to allow the atomic write to succeed.
      std::this_thread::yield();
#endif // TSAN
    }
    return;
  }

  while (true) {
    double start = dispenso::getTime();
    if (!notifier.waitFor(1ms)) {
      double end = dispenso::getTime();
      ++info.count;
      info.sleepErrorSum += std::abs((end - start) - 1e-3);
    } else {
      break;
    }
  }
}

TEST(Priorty, PriorityGetsCycles) {
  dispenso::ParForOptions options;
  options.wait = false;

  int overloadConcurrency = 2 * std::thread::hardware_concurrency();

  if (sizeof(void*) == 4) {
    overloadConcurrency = std::min(overloadConcurrency, 62);
  }

  dispenso::ThreadPool pool(std::max<dispenso::ssize_t>(10, overloadConcurrency));

  std::vector<ThreadInfo> info(pool.numThreads());

  dispenso::CompletionEvent stop;
  dispenso::Latch started(static_cast<uint32_t>(pool.numThreads()));

  dispenso::TaskSet tasks(pool);
  dispenso::parallel_for(
      tasks,
      0,
      pool.numThreads(),
      [&info, &stop, &started](size_t index) { run(index, info[index], stop, started); },
      options);

  // Let threads wake about 5000 times.
  std::this_thread::sleep_for(5s);

  stop.notify();

  tasks.wait();

  for (auto& i : info) {
    EXPECT_TRUE(i.prioOk) << "Failed for " << &i - info.data();
  }

#if !defined(DISPENSO_HAS_TSAN)
  // TSAN messes with scheduling enough that all bets are off.
  EXPECT_GE(info[0].error(), info[1].error());
  EXPECT_GE(info[1].error(), info[2].error());
  EXPECT_GE(info[2].error(), info[3].error());
#endif // TSAN
}


================================================
FILE: tests/resource_pool_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <gtest/gtest.h>

#include <dispenso/resource_pool.h>
#include <dispenso/thread_pool.h>

namespace {
// In real use cases, the buffer may perform some expensive initialization such as allocate a large
// chunk of memory.
struct Buffer {
  Buffer(std::atomic_int& _total_count, std::atomic_int& _num_buffers)
      : total_count(_total_count), num_buffers(_num_buffers), count(0) {}
  // On destruction, add the count
  ~Buffer() {
    total_count += count;
    num_buffers += 1;
  }
  std::atomic_int& total_count;
  std::atomic_int& num_buffers;
  int count;
};

void BuffersTest(const int num_threads, const int num_buffers) {
  constexpr int kNumTasks = 100000;
  std::atomic_int total_count(0);
  std::atomic_int num_buffers_created(0);
  {
    dispenso::ResourcePool<Buffer> buffer_pool(num_buffers, [&total_count, &num_buffers_created]() {
      return Buffer(total_count, num_buffers_created);
    });
    dispenso::ThreadPool thread_pool(num_threads);
    for (int i = 0; i < kNumTasks; ++i) {
      thread_pool.schedule([&]() {
        auto buffer_resource = buffer_pool.acquire();
        ++buffer_resource.get().count;
      });
    }
  }

  // The sum of all the buffers counts should be equal to the number of tasks.
  EXPECT_EQ(total_count, kNumTasks);
  EXPECT_EQ(num_buffers_created, num_buffers);
}

} // namespace

TEST(ResourcePool, SameNumBuffersAsThreadsTest) {
  constexpr int kNumBuffers = 2;
  constexpr int kNumThreads = 2;
  BuffersTest(kNumBuffers, kNumThreads);
}

TEST(ResourcePool, FewerBuffersThanThreadsTest) {
  constexpr int kNumBuffers = 1;
  constexpr int kNumThreads = 2;
  BuffersTest(kNumBuffers, kNumThreads);
}

TEST(ResourcePool, MoreBuffersThanThreadsTest) {
  constexpr int kNumBuffers = 2;
  constexpr int kNumThreads = 1;
  BuffersTest(kNumBuffers, kNumThreads);
}


================================================
FILE: tests/rw_lock_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/rw_lock.h>

#include <chrono>
#include <mutex>
#include <shared_mutex>
#include <thread>

#include <gtest/gtest.h>

using namespace std::chrono_literals;

TEST(RWLock, SimpleUncontested) {
  dispenso::RWLock mtx;
  int foo = 0;
  std::unique_lock<dispenso::RWLock> lk(mtx);
  foo = 1;

  lk.unlock();

  EXPECT_EQ(foo, 1);
}

TEST(RWLock, BasicWriterTest) {
  int count = 0;
  dispenso::RWLock mtx;
  constexpr int kPerThreadTotal = 100000;

  auto toRun = [&]() {
    for (int i = 0; i < kPerThreadTotal; ++i) {
      std::unique_lock<dispenso::RWLock> lk(mtx);
      ++count;
    }
  };

  std::thread thread0(toRun);
  std::thread thread1(toRun);

  thread0.join();
  thread1.join();

  EXPECT_EQ(count, 2 * kPerThreadTotal);
}

TEST(RWLock, HighContentionReaderWriterTest) {
  int count = 0;
  dispenso::RWLock mtx;
  constexpr int kPerThreadTotal = 100000;

  auto toRunWriter = [&]() {
    for (int i = 0; i < kPerThreadTotal; ++i) {
      std::unique_lock<dispenso::RWLock> lk(mtx);
      ++count;
    }
  };

  int64_t someVal = 0;

  auto toRunReader = [&]() {
    for (int i = 0; i < kPerThreadTotal; ++i) {
      std::shared_lock<dispenso::RWLock> lk(mtx);
      someVal += count;
    }
  };

  std::thread thread0(toRunWriter);
  std::thread thread1(toRunReader);

  thread0.join();
  thread1.join();

  EXPECT_EQ(count, kPerThreadTotal);
  EXPECT_GE(someVal, 0);
}

TEST(RWLock, ReaderWriterTest) {
  int guardedCount = 0;
  dispenso::RWLock mtx;
  constexpr int kWriterTotal = 100;
  constexpr int kReaderTotal = 100000;

  auto toRunWriter = [&]() {
    for (int i = 0; i < kWriterTotal; ++i) {
      std::unique_lock<dispenso::RWLock> lk(mtx);
      ++guardedCount;
      lk.unlock();
      // Just hang out for a while til we write again.
      std::this_thread::sleep_for(1ms);
    }
  };

  int64_t sum = 0;

  auto toRunReader = [&]() {
    for (int i = 0; i < kReaderTotal; ++i) {
      std::shared_lock<dispenso::RWLock> lk(mtx);
      sum += guardedCount;
    }
  };

  std::thread thread0(toRunWriter);
  std::thread thread1(toRunReader);

  thread0.join();
  thread1.join();

  EXPECT_EQ(guardedCount, kWriterTotal);
  EXPECT_GE(sum, 0);
}

TEST(RWLock, TestAlignment) {
  static_assert(
      alignof(dispenso::RWLock) >= dispenso::kCacheLineSize,
      "Somehow RWLock not aligned to avoid false sharing");
  static_assert(
      alignof(dispenso::UnalignedRWLock) < dispenso::kCacheLineSize,
      "UnalignedRWLock is overaligned");
}


================================================
FILE: tests/shared_pool_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <gmock/gmock.h>
#include <gtest/gtest.h>

#ifdef _WIN32
__declspec(dllimport) void* sharedPoolA();
__declspec(dllimport) void* sharedPoolB();
#else
__attribute__((visibility("default"))) void* sharedPoolA();
__attribute__((visibility("default"))) void* sharedPoolB();
#endif

TEST(ThreadPool, SharedPool) {
  EXPECT_EQ(sharedPoolA(), sharedPoolB());
}


================================================
FILE: tests/small_buffer_allocator_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/small_buffer_allocator.h>

#include <thread>
#include <vector>

#include <gtest/gtest.h>

// Helper to test alloc/dealloc for a specific size
template <size_t kSize>
void testAllocDealloc() {
  char* buf = dispenso::allocSmallBuffer<kSize>();
  ASSERT_NE(buf, nullptr);
  // Write to first and last byte to verify usability
  buf[0] = 'a';
  buf[kSize - 1] = 'z';
  dispenso::deallocSmallBuffer<kSize>(buf);
}

// Test allocation and deallocation for various sizes from 1 to 1024
// Note: The allocator supports sizes 4-256 in pools (smaller sizes map to 4-byte blocks),
// larger sizes use alignedMalloc
TEST(SmallBufferAllocator, AllocDeallocVariousSizes) {
  testAllocDealloc<1>(); // Maps to 4-byte block
  testAllocDealloc<2>(); // Maps to 4-byte block
  testAllocDealloc<3>(); // Maps to 4-byte block (non-power-of-2)
  testAllocDealloc<4>();
  testAllocDealloc<8>();
  testAllocDealloc<16>();
  testAllocDealloc<32>();
  testAllocDealloc<64>();
  testAllocDealloc<128>();
  testAllocDealloc<256>();
  testAllocDealloc<512>();
  testAllocDealloc<1024>();
}

// Helper to test bytesAllocated for a specific size
template <
    size_t kSize,
    typename std::enable_if<(kSize <= dispenso::kMaxSmallBufferSize), int>::type = 0>
void testBytesAllocated() {
  char* buf = dispenso::allocSmallBuffer<kSize>();
  EXPECT_GT(dispenso::approxBytesAllocatedSmallBuffer<kSize>(), size_t{0});
  dispenso::deallocSmallBuffer<kSize>(buf);
}

// Overload for sizes > kMaxSmallBufferSize (no-op since they use aligned malloc)
template <
    size_t kSize,
    typename std::enable_if<(kSize > dispenso::kMaxSmallBufferSize), int>::type = 0>
void testBytesAllocated() {
  // Sizes larger than kMaxSmallBufferSize fall back to aligned malloc
  // and don't track bytes in the pool, so nothing to test
}

// Test bytesAllocated for various sizes
TEST(SmallBufferAllocator, BytesAllocatedVariousSizes) {
  testBytesAllocated<4>();
  testBytesAllocated<8>();
  testBytesAllocated<16>();
  testBytesAllocated<32>();
  testBytesAllocated<64>();
  testBytesAllocated<128>();
  testBytesAllocated<256>();
}

// Test multiple allocations of same size
template <size_t kSize>
void testMultipleAllocs() {
  constexpr size_t kNumAllocs = 100;
  std::vector<char*> buffers(kNumAllocs);

  // Allocate many buffers
  for (size_t i = 0; i < kNumAllocs; ++i) {
    buffers[i] = dispenso::allocSmallBuffer<kSize>();
    ASSERT_NE(buffers[i], nullptr);
    // Write a pattern to verify no overlap
    buffers[i][0] = static_cast<char>(i & 0xFF);
  }

  // Verify patterns
  for (size_t i = 0; i < kNumAllocs; ++i) {
    EXPECT_EQ(buffers[i][0], static_cast<char>(i & 0xFF));
  }

  // Deallocate all
  for (size_t i = 0; i < kNumAllocs; ++i) {
    dispenso::deallocSmallBuffer<kSize>(buffers[i]);
  }
}

TEST(SmallBufferAllocator, MultipleAllocsSmallSize) {
  testMultipleAllocs<16>();
}

TEST(SmallBufferAllocator, MultipleAllocsMediumSize) {
  testMultipleAllocs<64>();
}

TEST(SmallBufferAllocator, MultipleAllocsLargeSize) {
  testMultipleAllocs<256>();
}

TEST(SmallBufferAllocator, MultipleAllocsOverMaxSize) {
  testMultipleAllocs<512>();
}

// Test allocation reuse after deallocation
TEST(SmallBufferAllocator, AllocationReuse) {
  // Allocate and deallocate, then allocate again
  char* buf1 = dispenso::allocSmallBuffer<32>();
  ASSERT_NE(buf1, nullptr);
  dispenso::deallocSmallBuffer<32>(buf1);

  char* buf2 = dispenso::allocSmallBuffer<32>();
  ASSERT_NE(buf2, nullptr);
  dispenso::deallocSmallBuffer<32>(buf2);
}

// Test threaded allocation/deallocation with various sizes
TEST(SmallBufferAllocator, ThreadedAllocDealloc) {
  constexpr size_t kNumThreads = 8;
  constexpr size_t kNumAllocsPerThread = 500;

  auto threadFunc = [&](size_t threadId) {
    for (size_t i = 0; i < kNumAllocsPerThread; ++i) {
      // Rotate through different sizes
      switch ((threadId + i) % 5) {
        case 0: {
          char* buf = dispenso::allocSmallBuffer<8>();
          buf[0] = 'x';
          dispenso::deallocSmallBuffer<8>(buf);
          break;
        }
        case 1: {
          char* buf = dispenso::allocSmallBuffer<32>();
          buf[0] = 'x';
          dispenso::deallocSmallBuffer<32>(buf);
          break;
        }
        case 2: {
          char* buf = dispenso::allocSmallBuffer<64>();
          buf[0] = 'x';
          dispenso::deallocSmallBuffer<64>(buf);
          break;
        }
        case 3: {
          char* buf = dispenso::allocSmallBuffer<128>();
          buf[0] = 'x';
          dispenso::deallocSmallBuffer<128>(buf);
          break;
        }
        case 4: {
          char* buf = dispenso::allocSmallBuffer<256>();
          buf[0] = 'x';
          dispenso::deallocSmallBuffer<256>(buf);
          break;
        }
      }
    }
  };

  std::vector<std::thread> threads;
  for (size_t t = 0; t < kNumThreads; ++t) {
    threads.emplace_back(threadFunc, t);
  }

  for (auto& t : threads) {
    t.join();
  }
}


================================================
FILE: tests/small_vector_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/small_vector.h>

#include <string>
#include <utility>

#include <gtest/gtest.h>

using dispenso::SmallVector;

// Test helper to track construction/destruction
struct Tracked {
  static int constructed;
  static int destructed;
  static int moveConstructed;
  static int copyConstructed;

  static void reset() {
    constructed = 0;
    destructed = 0;
    moveConstructed = 0;
    copyConstructed = 0;
  }

  int value;

  Tracked() : value(0) {
    ++constructed;
  }
  explicit Tracked(int v) : value(v) {
    ++constructed;
  }
  Tracked(const Tracked& other) : value(other.value) {
    ++constructed;
    ++copyConstructed;
  }
  Tracked(Tracked&& other) noexcept : value(other.value) {
    other.value = -1;
    ++constructed;
    ++moveConstructed;
  }
  ~Tracked() {
    ++destructed;
  }
  Tracked& operator=(const Tracked& other) {
    value = other.value;
    return *this;
  }
  Tracked& operator=(Tracked&& other) noexcept {
    value = other.value;
    other.value = -1;
    return *this;
  }
};

int Tracked::constructed = 0;
int Tracked::destructed = 0;
int Tracked::moveConstructed = 0;
int Tracked::copyConstructed = 0;

// --- Construction Tests ---

TEST(SmallVector, DefaultConstruction) {
  SmallVector<int> v;
  EXPECT_TRUE(v.empty());
  EXPECT_EQ(v.size(), 0u);
  EXPECT_GE(v.capacity(), 4u);
}

TEST(SmallVector, ConstructionWithSize) {
  SmallVector<int> v(3);
  EXPECT_EQ(v.size(), 3u);
  for (size_t i = 0; i < v.size(); ++i) {
    EXPECT_EQ(v[i], 0);
  }
}

TEST(SmallVector, ConstructionWithSizeAndValue) {
  SmallVector<int> v(5, 42);
  EXPECT_EQ(v.size(), 5u);
  for (size_t i = 0; i < v.size(); ++i) {
    EXPECT_EQ(v[i], 42);
  }
}

TEST(SmallVector, InitializerListConstruction) {
  SmallVector<int> v = {1, 2, 3, 4, 5};
  EXPECT_EQ(v.size(), 5u);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[4], 5);
}

TEST(SmallVector, CopyConstruction) {
  SmallVector<int> v1 = {1, 2, 3};
  SmallVector<int> v2(v1);
  EXPECT_EQ(v2.size(), 3u);
  EXPECT_EQ(v2[0], 1);
  EXPECT_EQ(v2[1], 2);
  EXPECT_EQ(v2[2], 3);
  // Verify independence
  v1[0] = 100;
  EXPECT_EQ(v2[0], 1);
}

TEST(SmallVector, MoveConstructionInline) {
  SmallVector<int, 8> v1 = {1, 2, 3};
  SmallVector<int, 8> v2(std::move(v1));
  EXPECT_EQ(v2.size(), 3u);
  EXPECT_EQ(v2[0], 1);
  EXPECT_EQ(v2[1], 2);
  EXPECT_EQ(v2[2], 3);
  EXPECT_EQ(v1.size(), 0u);
}

TEST(SmallVector, MoveConstructionHeap) {
  SmallVector<int, 2> v1 = {1, 2, 3, 4, 5}; // Forces heap allocation
  const int* oldData = v1.data();
  SmallVector<int, 2> v2(std::move(v1));
  EXPECT_EQ(v2.size(), 5u);
  EXPECT_EQ(v2.data(), oldData); // Should steal the pointer
  EXPECT_EQ(v2[0], 1);
  EXPECT_EQ(v2[4], 5);
  EXPECT_EQ(v1.size(), 0u);
}

// --- Assignment Tests ---

TEST(SmallVector, CopyAssignment) {
  SmallVector<int> v1 = {1, 2, 3};
  SmallVector<int> v2;
  v2 = v1;
  EXPECT_EQ(v2.size(), 3u);
  EXPECT_EQ(v2[0], 1);
  v1[0] = 100;
  EXPECT_EQ(v2[0], 1);
}

TEST(SmallVector, MoveAssignment) {
  SmallVector<int, 2> v1 = {1, 2, 3, 4, 5};
  SmallVector<int, 2> v2;
  v2 = std::move(v1);
  EXPECT_EQ(v2.size(), 5u);
  EXPECT_EQ(v2[0], 1);
  EXPECT_EQ(v1.size(), 0u);
}

TEST(SmallVector, MoveAssignmentHeapToHeap) {
  // Target has heap storage that needs to be freed
  SmallVector<int, 2> v1 = {1, 2, 3, 4, 5};
  SmallVector<int, 2> v2 = {10, 20, 30, 40}; // Also on heap
  v2 = std::move(v1);
  EXPECT_EQ(v2.size(), 5u);
  EXPECT_EQ(v2[0], 1);
  EXPECT_EQ(v1.size(), 0u);
}

TEST(SmallVector, MoveAssignmentInlineToHeap) {
  // Target has heap storage, source is inline
  SmallVector<int, 4> v1 = {1, 2}; // Inline
  SmallVector<int, 4> v2 = {10, 20, 30, 40, 50, 60}; // Heap
  v2 = std::move(v1);
  EXPECT_EQ(v2.size(), 2u);
  EXPECT_EQ(v2[0], 1);
  EXPECT_EQ(v2[1], 2);
  EXPECT_EQ(v1.size(), 0u);
}

TEST(SmallVector, SelfAssignment) {
  SmallVector<int> v = {1, 2, 3};
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wself-assign-overloaded"
#endif
  v = v;
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
  EXPECT_EQ(v.size(), 3u);
  EXPECT_EQ(v[0], 1);
}

// --- Element Access Tests ---

TEST(SmallVector, OperatorBracket) {
  SmallVector<int> v = {10, 20, 30};
  EXPECT_EQ(v[0], 10);
  EXPECT_EQ(v[1], 20);
  EXPECT_EQ(v[2], 30);
  v[1] = 25;
  EXPECT_EQ(v[1], 25);
}

TEST(SmallVector, FrontAndBack) {
  SmallVector<int> v = {1, 2, 3};
  EXPECT_EQ(v.front(), 1);
  EXPECT_EQ(v.back(), 3);
  v.front() = 10;
  v.back() = 30;
  EXPECT_EQ(v.front(), 10);
  EXPECT_EQ(v.back(), 30);
}

TEST(SmallVector, Data) {
  SmallVector<int> v = {1, 2, 3};
  int* ptr = v.data();
  EXPECT_EQ(ptr[0], 1);
  EXPECT_EQ(ptr[1], 2);
  EXPECT_EQ(ptr[2], 3);
}

// --- Capacity Tests ---

TEST(SmallVector, Empty) {
  SmallVector<int> v;
  EXPECT_TRUE(v.empty());
  v.push_back(1);
  EXPECT_FALSE(v.empty());
}

TEST(SmallVector, Size) {
  SmallVector<int> v;
  EXPECT_EQ(v.size(), 0u);
  v.push_back(1);
  EXPECT_EQ(v.size(), 1u);
  v.push_back(2);
  EXPECT_EQ(v.size(), 2u);
}

// --- Modifier Tests ---

TEST(SmallVector, PushBack) {
  SmallVector<int> v;
  v.push_back(1);
  v.push_back(2);
  v.push_back(3);
  EXPECT_EQ(v.size(), 3u);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[1], 2);
  EXPECT_EQ(v[2], 3);
}

TEST(SmallVector, PushBackGrowth) {
  SmallVector<int, 2> v;
  v.push_back(1);
  v.push_back(2);
  EXPECT_EQ(v.capacity(), 2u);
  v.push_back(3); // Should trigger growth
  EXPECT_GT(v.capacity(), 2u);
  EXPECT_EQ(v.size(), 3u);
  EXPECT_EQ(v[2], 3);
}

TEST(SmallVector, EmplaceBack) {
  SmallVector<std::pair<int, int>> v;
  v.emplace_back(1, 2);
  v.emplace_back(3, 4);
  EXPECT_EQ(v.size(), 2u);
  EXPECT_EQ(v[0].first, 1);
  EXPECT_EQ(v[0].second, 2);
  EXPECT_EQ(v[1].first, 3);
  EXPECT_EQ(v[1].second, 4);
}

TEST(SmallVector, PopBack) {
  SmallVector<int> v = {1, 2, 3};
  v.pop_back();
  EXPECT_EQ(v.size(), 2u);
  EXPECT_EQ(v.back(), 2);
  v.pop_back();
  EXPECT_EQ(v.size(), 1u);
  EXPECT_EQ(v.back(), 1);
}

TEST(SmallVector, Clear) {
  SmallVector<int> v = {1, 2, 3, 4, 5};
  v.clear();
  EXPECT_TRUE(v.empty());
  EXPECT_EQ(v.size(), 0u);
}

TEST(SmallVector, ResizeGrow) {
  SmallVector<int> v = {1, 2};
  v.resize(5);
  EXPECT_EQ(v.size(), 5u);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[1], 2);
  EXPECT_EQ(v[2], 0);
  EXPECT_EQ(v[3], 0);
  EXPECT_EQ(v[4], 0);
}

TEST(SmallVector, ResizeShrink) {
  SmallVector<int> v = {1, 2, 3, 4, 5};
  v.resize(2);
  EXPECT_EQ(v.size(), 2u);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[1], 2);
}

TEST(SmallVector, ResizeWithValue) {
  SmallVector<int> v = {1, 2};
  v.resize(5, 42);
  EXPECT_EQ(v.size(), 5u);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[1], 2);
  EXPECT_EQ(v[2], 42);
  EXPECT_EQ(v[3], 42);
  EXPECT_EQ(v[4], 42);
}

TEST(SmallVector, EraseMiddle) {
  SmallVector<int> v = {1, 2, 3, 4, 5};
  auto it = v.erase(v.begin() + 2);
  EXPECT_EQ(v.size(), 4u);
  EXPECT_EQ(*it, 4);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[1], 2);
  EXPECT_EQ(v[2], 4);
  EXPECT_EQ(v[3], 5);
}

TEST(SmallVector, EraseFirst) {
  SmallVector<int> v = {1, 2, 3};
  v.erase(v.begin());
  EXPECT_EQ(v.size(), 2u);
  EXPECT_EQ(v[0], 2);
  EXPECT_EQ(v[1], 3);
}

TEST(SmallVector, EraseLast) {
  SmallVector<int> v = {1, 2, 3};
  v.erase(v.end() - 1);
  EXPECT_EQ(v.size(), 2u);
  EXPECT_EQ(v[0], 1);
  EXPECT_EQ(v[1], 2);
}

// --- Iterator Tests ---

TEST(SmallVector, RangeBasedFor) {
  SmallVector<int> v = {1, 2, 3, 4, 5};
  int sum = 0;
  for (int x : v) {
    sum += x;
  }
  EXPECT_EQ(sum, 15);
}

TEST(SmallVector, ConstIteration) {
  const SmallVector<int> v = {1, 2, 3};
  int sum = 0;
  for (const int& x : v) {
    sum += x;
  }
  EXPECT_EQ(sum, 6);
}

TEST(SmallVector, BeginEnd) {
  SmallVector<int> v = {1, 2, 3};
  EXPECT_EQ(*v.begin(), 1);
  EXPECT_EQ(*(v.end() - 1), 3);
  EXPECT_EQ(v.end() - v.begin(), 3);
}

TEST(SmallVector, CBeginCEnd) {
  SmallVector<int> v = {1, 2, 3};
  EXPECT_EQ(*v.cbegin(), 1);
  EXPECT_EQ(*(v.cend() - 1), 3);
  EXPECT_EQ(v.cend() - v.cbegin(), 3);
}

TEST(SmallVector, ConstAccessors) {
  const SmallVector<int> v = {10, 20, 30};
  EXPECT_EQ(v[0], 10);
  EXPECT_EQ(v[2], 30);
  EXPECT_EQ(v.front(), 10);
  EXPECT_EQ(v.back(), 30);
  EXPECT_EQ(v.data()[1], 20);
  EXPECT_EQ(*v.begin(), 10);
  EXPECT_EQ(*(v.end() - 1), 30);
}

// --- Object Lifetime Tests ---

TEST(SmallVector, DestructorsCalled) {
  Tracked::reset();
  {
    SmallVector<Tracked, 4> v;
    v.emplace_back(1);
    v.emplace_back(2);
    v.emplace_back(3);
    EXPECT_EQ(Tracked::constructed, 3);
    EXPECT_EQ(Tracked::destructed, 0);
  }
  EXPECT_EQ(Tracked::constructed, Tracked::destructed);
}

TEST(SmallVector, DestructorsCalledOnClear) {
  Tracked::reset();
  SmallVector<Tracked, 4> v;
  v.emplace_back(1);
  v.emplace_back(2);
  int beforeClear = Tracked::destructed;
  v.clear();
  EXPECT_EQ(Tracked::destructed - beforeClear, 2);
}

TEST(SmallVector, DestructorsCalledOnGrowth) {
  Tracked::reset();
  SmallVector<Tracked, 2> v;
  v.emplace_back(1);
  v.emplace_back(2);
  int beforeGrowth = Tracked::destructed;
  v.emplace_back(3); // Triggers growth, should move and destroy old
  EXPECT_EQ(Tracked::destructed - beforeGrowth, 2); // Old elements destroyed
  EXPECT_EQ(Tracked::moveConstructed, 2); // Old elements moved to new storage
}

TEST(SmallVector, MoveSemantics) {
  Tracked::reset();
  SmallVector<Tracked, 2> v1;
  v1.emplace_back(1);
  v1.emplace_back(2);

  int constructedBefore = Tracked::constructed;
  SmallVector<Tracked, 2> v2(std::move(v1));
  // Inline move should move-construct elements
  EXPECT_EQ(Tracked::constructed - constructedBefore, 2);
  EXPECT_EQ(v2.size(), 2u);
  EXPECT_EQ(v2[0].value, 1);
  EXPECT_EQ(v2[1].value, 2);
}

// --- String Tests (non-trivial type) ---

TEST(SmallVector, StringOperations) {
  SmallVector<std::string, 4> v;
  v.push_back("hello");
  v.push_back("world");
  v.emplace_back("!");
  EXPECT_EQ(v.size(), 3u);
  EXPECT_EQ(v[0], "hello");
  EXPECT_EQ(v[1], "world");
  EXPECT_EQ(v[2], "!");
}

TEST(SmallVector, StringGrowth) {
  SmallVector<std::string, 2> v;
  v.push_back("one");
  v.push_back("two");
  v.push_back("three");
  v.push_back("four");
  EXPECT_EQ(v.size(), 4u);
  EXPECT_EQ(v[0], "one");
  EXPECT_EQ(v[3], "four");
}

// --- Edge Cases ---

TEST(SmallVector, LargeInlineCapacity) {
  SmallVector<int, 100> v;
  for (int i = 0; i < 100; ++i) {
    v.push_back(i);
  }
  EXPECT_EQ(v.size(), 100u);
  EXPECT_EQ(v.capacity(), 100u); // Should still be inline
  for (int i = 0; i < 100; ++i) {
    EXPECT_EQ(v[static_cast<size_t>(i)], i);
  }
}

TEST(SmallVector, DifferentInlineCapacities) {
  SmallVector<int, 2> v2 = {1, 2};
  SmallVector<int, 8> v8 = {1, 2, 3, 4, 5, 6, 7, 8};
  EXPECT_EQ(v2.size(), 2u);
  EXPECT_EQ(v8.size(), 8u);
}

// --- Heap regrowth tests (exceed 2*N to trigger heap→heap growth) ---

TEST(SmallVector, HeapRegrowth) {
  SmallVector<int, 4> v;
  // Push 20 elements: inline(4) → heap(8) → heap(16) → heap(32)
  for (int i = 0; i < 20; ++i) {
    v.push_back(i);
  }
  EXPECT_EQ(v.size(), 20u);
  EXPECT_GE(v.capacity(), 20u);
  for (int i = 0; i < 20; ++i) {
    EXPECT_EQ(v[static_cast<size_t>(i)], i);
  }
}

TEST(SmallVector, HeapRegrowthWithReserve) {
  SmallVector<int, 4> v;
  v.reserve(100);
  EXPECT_GE(v.capacity(), 100u);
  for (int i = 0; i < 100; ++i) {
    v.push_back(i);
  }
  EXPECT_EQ(v.size(), 100u);
  for (int i = 0; i < 100; ++i) {
    EXPECT_EQ(v[static_cast<size_t>(i)], i);
  }
}

TEST(SmallVector, HeapRegrowthTracked) {
  Tracked::reset();
  {
    SmallVector<Tracked, 2> v;
    for (int i = 0; i < 10; ++i) {
      v.emplace_back(i);
    }
    EXPECT_EQ(v.size(), 10u);
    for (int i = 0; i < 10; ++i) {
      EXPECT_EQ(v[static_cast<size_t>(i)].value, i);
    }
  }
  EXPECT_EQ(Tracked::constructed, Tracked::destructed);
}

TEST(SmallVector, EraseOnHeap) {
  SmallVector<int, 2> v;
  for (int i = 0; i < 8; ++i) {
    v.push_back(i);
  }
  // Erase from the middle while on heap
  v.erase(v.begin() + 3);
  EXPECT_EQ(v.size(), 7u);
  EXPECT_EQ(v[0], 0);
  EXPECT_EQ(v[1], 1);
  EXPECT_EQ(v[2], 2);
  EXPECT_EQ(v[3], 4); // was 3, now shifted
  EXPECT_EQ(v[6], 7);
}

TEST(SmallVector, PopBackOnHeap) {
  SmallVector<int, 2> v;
  for (int i = 0; i < 8; ++i) {
    v.push_back(i);
  }
  v.pop_back();
  v.pop_back();
  EXPECT_EQ(v.size(), 6u);
  EXPECT_EQ(v.back(), 5);
}


================================================
FILE: tests/spsc_ring_buffer_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/spsc_ring_buffer.h>

#include <array>
#include <atomic>
#include <memory>
#include <string>
#include <thread>
#include <vector>

#include <gtest/gtest.h>

using dispenso::SPSCRingBuffer;

// =============================================================================
// Basic Functionality Tests
// =============================================================================

TEST(SPSCRingBuffer, DefaultConstructionIsEmpty) {
  SPSCRingBuffer<int> buffer;
  EXPECT_TRUE(buffer.empty());
  EXPECT_FALSE(buffer.full());
  EXPECT_EQ(buffer.size(), 0u);
}

TEST(SPSCRingBuffer, CapacityIsCorrect) {
  // With RoundUpToPowerOfTwo=true (default), capacity is rounded up to next power of two minus 1
  // Capacity=8: buffer size = nextPowerOfTwo(9) = 16, actual capacity = 15
  SPSCRingBuffer<int, 8> buffer8;
  EXPECT_EQ(buffer8.capacity(), 15u);

  // Capacity=1: buffer size = nextPowerOfTwo(2) = 2, actual capacity = 1
  SPSCRingBuffer<int, 1> buffer1;
  EXPECT_EQ(buffer1.capacity(), 1u);

  // Capacity=100: buffer size = nextPowerOfTwo(101) = 128, actual capacity = 127
  SPSCRingBuffer<int, 100> buffer100;
  EXPECT_EQ(buffer100.capacity(), 127u);

  // Default capacity (16): buffer size = nextPowerOfTwo(17) = 32, actual capacity = 31
  SPSCRingBuffer<int> bufferDefault;
  EXPECT_EQ(bufferDefault.capacity(), 31u);

  // With RoundUpToPowerOfTwo=false, capacity matches the template parameter exactly
  SPSCRingBuffer<int, 8, false> exactBuffer8;
  EXPECT_EQ(exactBuffer8.capacity(), 8u);

  SPSCRingBuffer<int, 100, false> exactBuffer100;
  EXPECT_EQ(exactBuffer100.capacity(), 100u);
}

TEST(SPSCRingBuffer, PushAndPopSingleElement) {
  SPSCRingBuffer<int, 4> buffer;

  EXPECT_TRUE(buffer.try_push(42));
  EXPECT_FALSE(buffer.empty());
  EXPECT_EQ(buffer.size(), 1u);

  int value = 0;
  EXPECT_TRUE(buffer.try_pop(value));
  EXPECT_EQ(value, 42);
  EXPECT_TRUE(buffer.empty());
  EXPECT_EQ(buffer.size(), 0u);
}

TEST(SPSCRingBuffer, PushMoveSemantics) {
  SPSCRingBuffer<std::string, 4> buffer;

  std::string original = "hello world";
  EXPECT_TRUE(buffer.try_push(std::move(original)));
  // After move, original should be empty (or at least valid but unspecified)
  EXPECT_TRUE(original.empty());

  std::string result;
  EXPECT_TRUE(buffer.try_pop(result));
  EXPECT_EQ(result, "hello world");
}

TEST(SPSCRingBuffer, PushCopySemantics) {
  SPSCRingBuffer<std::string, 4> buffer;

  std::string original = "hello world";
  EXPECT_TRUE(buffer.try_push(original));
  // After copy, original should be unchanged
  EXPECT_EQ(original, "hello world");

  std::string result;
  EXPECT_TRUE(buffer.try_pop(result));
  EXPECT_EQ(result, "hello world");
}

TEST(SPSCRingBuffer, TryEmplace) {
  SPSCRingBuffer<std::pair<int, std::string>, 4> buffer;

  EXPECT_TRUE(buffer.try_emplace(42, "hello"));
  EXPECT_EQ(buffer.size(), 1u);

  std::pair<int, std::string> result;
  EXPECT_TRUE(buffer.try_pop(result));
  EXPECT_EQ(result.first, 42);
  EXPECT_EQ(result.second, "hello");
}

TEST(SPSCRingBuffer, PopFromEmpty) {
  SPSCRingBuffer<int, 4> buffer;

  int value = 999;
  EXPECT_FALSE(buffer.try_pop(value));
  // Value should be unchanged
  EXPECT_EQ(value, 999);
}

TEST(SPSCRingBuffer, TryPopReturnsOpResult) {
  SPSCRingBuffer<int, 4> buffer;

  // Pop from empty returns empty OpResult
  auto emptyResult = buffer.try_pop();
  EXPECT_FALSE(emptyResult);
  EXPECT_FALSE(emptyResult.has_value());

  // Push and pop returns value
  EXPECT_TRUE(buffer.try_push(42));
  auto result = buffer.try_pop();
  EXPECT_TRUE(result);
  EXPECT_TRUE(result.has_value());
  EXPECT_EQ(result.value(), 42);
  EXPECT_TRUE(buffer.empty());
}

TEST(SPSCRingBuffer, TryPopOpResultWithMoveOnlyType) {
  SPSCRingBuffer<std::unique_ptr<int>, 4> buffer;

  // Push a unique_ptr
  EXPECT_TRUE(buffer.try_push(std::make_unique<int>(123)));

  // Pop using OpResult
  auto result = buffer.try_pop();
  EXPECT_TRUE(result);
  EXPECT_NE(result.value(), nullptr);
  EXPECT_EQ(*result.value(), 123);
}

TEST(SPSCRingBuffer, TryPopOpResultWithString) {
  SPSCRingBuffer<std::string, 4> buffer;

  EXPECT_TRUE(buffer.try_push("hello world"));
  auto result = buffer.try_pop();
  EXPECT_TRUE(result);
  EXPECT_EQ(result.value(), "hello world");
}

TEST(SPSCRingBuffer, FIFOOrder) {
  SPSCRingBuffer<int, 8> buffer;

  for (int i = 0; i < 5; ++i) {
    EXPECT_TRUE(buffer.try_push(i));
  }

  for (int i = 0; i < 5; ++i) {
    int value = -1;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, i);
  }
}

// =============================================================================
// Capacity and Full/Empty Tests
// =============================================================================

TEST(SPSCRingBuffer, FillToCapacity) {
  SPSCRingBuffer<int, 4> buffer;
  const size_t cap = buffer.capacity();

  // Fill buffer to capacity
  for (size_t i = 0; i < cap; ++i) {
    EXPECT_FALSE(buffer.full());
    EXPECT_TRUE(buffer.try_push(static_cast<int>(i)));
  }

  EXPECT_TRUE(buffer.full());
  EXPECT_EQ(buffer.size(), cap);

  // Additional push should fail
  EXPECT_FALSE(buffer.try_push(100));
  EXPECT_EQ(buffer.size(), cap);
}

TEST(SPSCRingBuffer, FullThenPop) {
  SPSCRingBuffer<int, 4> buffer;
  const size_t cap = buffer.capacity();

  // Fill buffer
  for (size_t i = 0; i < cap; ++i) {
    EXPECT_TRUE(buffer.try_push(static_cast<int>(i)));
  }
  EXPECT_TRUE(buffer.full());

  // Pop one element
  int value;
  EXPECT_TRUE(buffer.try_pop(value));
  EXPECT_EQ(value, 0);
  EXPECT_FALSE(buffer.full());

  // Now push should succeed again
  EXPECT_TRUE(buffer.try_push(100));
  EXPECT_TRUE(buffer.full());
}

TEST(SPSCRingBuffer, CapacityOne) {
  SPSCRingBuffer<int, 1> buffer;

  EXPECT_TRUE(buffer.empty());
  EXPECT_FALSE(buffer.full());

  EXPECT_TRUE(buffer.try_push(42));
  EXPECT_FALSE(buffer.empty());
  EXPECT_TRUE(buffer.full());

  EXPECT_FALSE(buffer.try_push(43));

  int value;
  EXPECT_TRUE(buffer.try_pop(value));
  EXPECT_EQ(value, 42);
  EXPECT_TRUE(buffer.empty());
}

TEST(SPSCRingBuffer, SizeTracking) {
  SPSCRingBuffer<int, 8> buffer;

  for (size_t i = 0; i < 8; ++i) {
    EXPECT_EQ(buffer.size(), i);
    EXPECT_TRUE(buffer.try_push(static_cast<int>(i)));
  }
  EXPECT_EQ(buffer.size(), 8u);

  for (size_t i = 8; i > 0; --i) {
    EXPECT_EQ(buffer.size(), i);
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
  }
  EXPECT_EQ(buffer.size(), 0u);
}

// =============================================================================
// Wrap-around Tests
// =============================================================================

TEST(SPSCRingBuffer, WrapAround) {
  // Use exact capacity mode for predictable wrap-around behavior in this test
  SPSCRingBuffer<int, 4, false> buffer;

  // Push 3 elements
  for (int i = 0; i < 3; ++i) {
    EXPECT_TRUE(buffer.try_push(i));
  }

  // Pop 2 elements (advances head)
  for (int i = 0; i < 2; ++i) {
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, i);
  }

  // Push 3 more elements (will wrap around)
  for (int i = 10; i < 13; ++i) {
    EXPECT_TRUE(buffer.try_push(i));
  }

  // Should have 4 elements now: 2, 10, 11, 12
  EXPECT_EQ(buffer.size(), 4u);
  EXPECT_TRUE(buffer.full());

  // Pop all and verify order
  int expected[] = {2, 10, 11, 12};
  for (int exp : expected) {
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, exp);
  }
}

TEST(SPSCRingBuffer, MultipleWrapArounds) {
  SPSCRingBuffer<int, 4> buffer;

  // Perform many push/pop cycles to test wrap-around thoroughly
  for (int cycle = 0; cycle < 100; ++cycle) {
    for (int i = 0; i < 3; ++i) {
      EXPECT_TRUE(buffer.try_push(cycle * 10 + i));
    }
    for (int i = 0; i < 3; ++i) {
      int value;
      EXPECT_TRUE(buffer.try_pop(value));
      EXPECT_EQ(value, cycle * 10 + i);
    }
    EXPECT_TRUE(buffer.empty());
  }
}

TEST(SPSCRingBuffer, SizeWithWrapAround) {
  // Use exact capacity mode for predictable size behavior
  SPSCRingBuffer<int, 4, false> buffer;

  // Fill halfway
  buffer.try_push(1);
  buffer.try_push(2);

  // Pop one
  int v;
  buffer.try_pop(v);

  // Push to full (will wrap)
  buffer.try_push(3);
  buffer.try_push(4);
  buffer.try_push(5);

  EXPECT_EQ(buffer.size(), 4u);
  EXPECT_TRUE(buffer.full());
}

// =============================================================================
// Move-only Type Tests
// =============================================================================

TEST(SPSCRingBuffer, MoveOnlyType) {
  SPSCRingBuffer<std::unique_ptr<int>, 4> buffer;

  auto ptr = std::make_unique<int>(42);
  EXPECT_TRUE(buffer.try_push(std::move(ptr)));
  EXPECT_EQ(ptr, nullptr);

  std::unique_ptr<int> result;
  EXPECT_TRUE(buffer.try_pop(result));
  EXPECT_NE(result, nullptr);
  EXPECT_EQ(*result, 42);
}

TEST(SPSCRingBuffer, MoveOnlyTypeMultiple) {
  SPSCRingBuffer<std::unique_ptr<int>, 4> buffer;

  for (int i = 0; i < 4; ++i) {
    EXPECT_TRUE(buffer.try_push(std::make_unique<int>(i)));
  }

  for (int i = 0; i < 4; ++i) {
    std::unique_ptr<int> result;
    EXPECT_TRUE(buffer.try_pop(result));
    EXPECT_EQ(*result, i);
  }
}

// =============================================================================
// Non-trivial Type Tests
// =============================================================================

namespace {
struct NonTrivial {
  static std::atomic<int> constructorCount;
  static std::atomic<int> destructorCount;
  static std::atomic<int> moveCount;

  int value;

  NonTrivial() : value(0) {
    constructorCount.fetch_add(1, std::memory_order_relaxed);
  }
  explicit NonTrivial(int v) : value(v) {
    constructorCount.fetch_add(1, std::memory_order_relaxed);
  }
  NonTrivial(const NonTrivial& other) : value(other.value) {
    constructorCount.fetch_add(1, std::memory_order_relaxed);
  }
  NonTrivial(NonTrivial&& other) noexcept : value(other.value) {
    other.value = -1;
    moveCount.fetch_add(1, std::memory_order_relaxed);
  }
  NonTrivial& operator=(const NonTrivial& other) {
    value = other.value;
    return *this;
  }
  NonTrivial& operator=(NonTrivial&& other) noexcept {
    value = other.value;
    other.value = -1;
    moveCount.fetch_add(1, std::memory_order_relaxed);
    return *this;
  }
  ~NonTrivial() {
    destructorCount.fetch_add(1, std::memory_order_relaxed);
  }

  static void resetCounters() {
    constructorCount.store(0, std::memory_order_relaxed);
    destructorCount.store(0, std::memory_order_relaxed);
    moveCount.store(0, std::memory_order_relaxed);
  }
};

std::atomic<int> NonTrivial::constructorCount{0};
std::atomic<int> NonTrivial::destructorCount{0};
std::atomic<int> NonTrivial::moveCount{0};
} // namespace

TEST(SPSCRingBuffer, NonTrivialType) {
  NonTrivial::resetCounters();

  {
    SPSCRingBuffer<NonTrivial, 4> buffer;

    NonTrivial obj(42);
    EXPECT_TRUE(buffer.try_push(std::move(obj)));

    NonTrivial result;
    EXPECT_TRUE(buffer.try_pop(result));
    EXPECT_EQ(result.value, 42);
  }

  // All objects should be destroyed
  // (buffer storage + temporary objects)
}

TEST(SPSCRingBuffer, NonTrivialDestructorOnBufferDestruction) {
  NonTrivial::resetCounters();

  {
    SPSCRingBuffer<NonTrivial, 4> buffer;

    buffer.try_push(NonTrivial(1));
    buffer.try_push(NonTrivial(2));
    buffer.try_push(NonTrivial(3));
    // Buffer goes out of scope with 3 elements
  }

  // Destructor should have been called for all elements
}

// =============================================================================
// Concurrent Producer-Consumer Tests
// =============================================================================

TEST(SPSCRingBuffer, ConcurrentProducerConsumer) {
  SPSCRingBuffer<int, 16> buffer;
  constexpr int kNumItems = 100000;

  std::atomic<bool> producerDone{false};
  std::vector<int> consumed;
  consumed.reserve(kNumItems);

  // Producer thread
  std::thread producer([&]() {
    for (int i = 0; i < kNumItems; ++i) {
      while (!buffer.try_push(i)) {
        // Spin until space available
        std::this_thread::yield();
      }
    }
    producerDone.store(true, std::memory_order_release);
  });

  // Consumer thread
  std::thread consumer([&]() {
    int value;
    while (consumed.size() < static_cast<size_t>(kNumItems)) {
      if (buffer.try_pop(value)) {
        consumed.push_back(value);
      } else if (producerDone.load(std::memory_order_acquire) && buffer.empty()) {
        break;
      } else {
        std::this_thread::yield();
      }
    }
  });

  producer.join();
  consumer.join();

  // Verify all items received in order
  ASSERT_EQ(consumed.size(), kNumItems);
  for (int i = 0; i < kNumItems; ++i) {
    EXPECT_EQ(consumed[i], i) << "Mismatch at index " << i;
  }
}

TEST(SPSCRingBuffer, ConcurrentWithSmallBuffer) {
  // Small buffer to increase contention
  SPSCRingBuffer<int, 2> buffer;
  constexpr int kNumItems = 10000;

  std::atomic<bool> producerDone{false};
  std::vector<int> consumed;
  consumed.reserve(kNumItems);

  std::thread producer([&]() {
    for (int i = 0; i < kNumItems; ++i) {
      while (!buffer.try_push(i)) {
        std::this_thread::yield();
      }
    }
    producerDone.store(true, std::memory_order_release);
  });

  std::thread consumer([&]() {
    int value;
    while (consumed.size() < static_cast<size_t>(kNumItems)) {
      if (buffer.try_pop(value)) {
        consumed.push_back(value);
      } else if (producerDone.load(std::memory_order_acquire) && buffer.empty()) {
        break;
      } else {
        std::this_thread::yield();
      }
    }
  });

  producer.join();
  consumer.join();

  ASSERT_EQ(consumed.size(), kNumItems);
  for (int i = 0; i < kNumItems; ++i) {
    EXPECT_EQ(consumed[i], i);
  }
}

TEST(SPSCRingBuffer, ConcurrentWithMoveOnlyType) {
  SPSCRingBuffer<std::unique_ptr<int>, 8> buffer;
  constexpr int kNumItems = 10000;

  std::atomic<bool> producerDone{false};
  std::vector<int> consumed;
  consumed.reserve(kNumItems);

  std::thread producer([&]() {
    for (int i = 0; i < kNumItems; ++i) {
      auto ptr = std::make_unique<int>(i);
      while (!buffer.try_push(std::move(ptr))) {
        std::this_thread::yield();
      }
    }
    producerDone.store(true, std::memory_order_release);
  });

  std::thread consumer([&]() {
    std::unique_ptr<int> value;
    while (consumed.size() < static_cast<size_t>(kNumItems)) {
      if (buffer.try_pop(value)) {
        consumed.push_back(*value);
        value.reset();
      } else if (producerDone.load(std::memory_order_acquire) && buffer.empty()) {
        break;
      } else {
        std::this_thread::yield();
      }
    }
  });

  producer.join();
  consumer.join();

  ASSERT_EQ(consumed.size(), kNumItems);
  for (int i = 0; i < kNumItems; ++i) {
    EXPECT_EQ(consumed[i], i);
  }
}

// =============================================================================
// Edge Cases
// =============================================================================

TEST(SPSCRingBuffer, EmptyAfterDraining) {
  SPSCRingBuffer<int, 4> buffer;

  buffer.try_push(1);
  buffer.try_push(2);

  int v;
  buffer.try_pop(v);
  buffer.try_pop(v);

  EXPECT_TRUE(buffer.empty());
  EXPECT_EQ(buffer.size(), 0u);
  EXPECT_FALSE(buffer.try_pop(v));
}

TEST(SPSCRingBuffer, RepeatedFillAndDrain) {
  SPSCRingBuffer<int, 4> buffer;
  const size_t cap = buffer.capacity();

  for (int cycle = 0; cycle < 10; ++cycle) {
    // Fill completely
    for (size_t i = 0; i < cap; ++i) {
      EXPECT_TRUE(buffer.try_push(cycle * 10 + static_cast<int>(i)));
    }
    EXPECT_TRUE(buffer.full());
    EXPECT_FALSE(buffer.try_push(999));

    // Drain completely
    for (size_t i = 0; i < cap; ++i) {
      int value;
      EXPECT_TRUE(buffer.try_pop(value));
      EXPECT_EQ(value, cycle * 10 + static_cast<int>(i));
    }
    EXPECT_TRUE(buffer.empty());
    int dummy;
    EXPECT_FALSE(buffer.try_pop(dummy));
  }
}

TEST(SPSCRingBuffer, AlternatingPushPop) {
  SPSCRingBuffer<int, 4> buffer;

  for (int i = 0; i < 1000; ++i) {
    EXPECT_TRUE(buffer.try_push(i));
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, i);
    EXPECT_TRUE(buffer.empty());
  }
}

TEST(SPSCRingBuffer, LargeElements) {
  struct LargeStruct {
    std::array<char, 1024> data;
    int id;

    LargeStruct() : id(0) {
      data.fill('x');
    }
    explicit LargeStruct(int i) : id(i) {
      data.fill(static_cast<char>('0' + (i % 10)));
    }
  };

  SPSCRingBuffer<LargeStruct, 4> buffer;

  for (int i = 0; i < 4; ++i) {
    EXPECT_TRUE(buffer.try_push(LargeStruct(i)));
  }

  for (int i = 0; i < 4; ++i) {
    LargeStruct value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value.id, i);
    EXPECT_EQ(value.data[0], static_cast<char>('0' + i));
  }
}

// =============================================================================
// String Type Tests (common use case)
// =============================================================================

TEST(SPSCRingBuffer, StringElements) {
  SPSCRingBuffer<std::string, 8> buffer;

  std::vector<std::string> testStrings = {
      "hello", "world", "this is a longer string", "", "short", "another test string"};

  for (const auto& s : testStrings) {
    EXPECT_TRUE(buffer.try_push(s));
  }

  for (const auto& expected : testStrings) {
    std::string result;
    EXPECT_TRUE(buffer.try_pop(result));
    EXPECT_EQ(result, expected);
  }
}

// =============================================================================
// try_pop_into Tests
// =============================================================================

TEST(SPSCRingBuffer, TryPopIntoBasic) {
  SPSCRingBuffer<int, 4> buffer;

  buffer.try_push(42);
  buffer.try_push(100);

  alignas(int) char storage[sizeof(int)];
  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<int*>(storage)));

  int* value = reinterpret_cast<int*>(storage);
  EXPECT_EQ(*value, 42);

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<int*>(storage)));
  EXPECT_EQ(*value, 100);

  EXPECT_FALSE(buffer.try_pop_into(reinterpret_cast<int*>(storage)));
}

TEST(SPSCRingBuffer, TryPopIntoMoveOnly) {
  SPSCRingBuffer<std::unique_ptr<int>, 4> buffer;

  buffer.try_push(std::make_unique<int>(42));
  buffer.try_push(std::make_unique<int>(100));

  alignas(std::unique_ptr<int>) char storage[sizeof(std::unique_ptr<int>)];

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<std::unique_ptr<int>*>(storage)));
  auto* ptr = reinterpret_cast<std::unique_ptr<int>*>(storage);
  EXPECT_EQ(**ptr, 42);
  ptr->~unique_ptr();

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<std::unique_ptr<int>*>(storage)));
  ptr = reinterpret_cast<std::unique_ptr<int>*>(storage);
  EXPECT_EQ(**ptr, 100);
  ptr->~unique_ptr();

  EXPECT_FALSE(buffer.try_pop_into(reinterpret_cast<std::unique_ptr<int>*>(storage)));
}

TEST(SPSCRingBuffer, TryPopIntoNonTrivial) {
  NonTrivial::resetCounters();

  SPSCRingBuffer<NonTrivial, 4> buffer;

  buffer.try_push(NonTrivial(42));
  buffer.try_push(NonTrivial(100));

  int initialDestructorCount = NonTrivial::destructorCount.load();

  alignas(NonTrivial) char storage[sizeof(NonTrivial)];

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<NonTrivial*>(storage)));
  auto* obj = reinterpret_cast<NonTrivial*>(storage);
  EXPECT_EQ(obj->value, 42);

  // Destructor should have been called for the element in the buffer
  EXPECT_GT(NonTrivial::destructorCount.load(), initialDestructorCount);

  obj->~NonTrivial();

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<NonTrivial*>(storage)));
  obj = reinterpret_cast<NonTrivial*>(storage);
  EXPECT_EQ(obj->value, 100);
  obj->~NonTrivial();
}

// =============================================================================
// Non-Default-Constructible Type Tests
// =============================================================================

namespace {
struct NonDefaultConstructible {
  int value;

  // No default constructor
  explicit NonDefaultConstructible(int v) : value(v) {}
  NonDefaultConstructible(const NonDefaultConstructible&) = default;
  NonDefaultConstructible(NonDefaultConstructible&&) = default;
  NonDefaultConstructible& operator=(const NonDefaultConstructible&) = default;
  NonDefaultConstructible& operator=(NonDefaultConstructible&&) = default;
};
} // namespace

TEST(SPSCRingBuffer, NonDefaultConstructibleType) {
  SPSCRingBuffer<NonDefaultConstructible, 4> buffer;

  EXPECT_TRUE(buffer.try_emplace(42));
  EXPECT_TRUE(buffer.try_emplace(100));
  EXPECT_EQ(buffer.size(), 2u);

  // Use try_pop_into since we can't default-construct
  alignas(NonDefaultConstructible) char storage[sizeof(NonDefaultConstructible)];

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<NonDefaultConstructible*>(storage)));
  auto* obj = reinterpret_cast<NonDefaultConstructible*>(storage);
  EXPECT_EQ(obj->value, 42);
  obj->~NonDefaultConstructible();

  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<NonDefaultConstructible*>(storage)));
  obj = reinterpret_cast<NonDefaultConstructible*>(storage);
  EXPECT_EQ(obj->value, 100);
  obj->~NonDefaultConstructible();
}

TEST(SPSCRingBuffer, NonDefaultConstructibleWithMove) {
  SPSCRingBuffer<NonDefaultConstructible, 4> buffer;

  NonDefaultConstructible obj(42);
  EXPECT_TRUE(buffer.try_push(std::move(obj)));

  alignas(NonDefaultConstructible) char storage[sizeof(NonDefaultConstructible)];
  EXPECT_TRUE(buffer.try_pop_into(reinterpret_cast<NonDefaultConstructible*>(storage)));
  auto* result = reinterpret_cast<NonDefaultConstructible*>(storage);
  EXPECT_EQ(result->value, 42);
  result->~NonDefaultConstructible();
}

// =============================================================================
// Large Capacity Tests
// =============================================================================

TEST(SPSCRingBuffer, LargeCapacity) {
  SPSCRingBuffer<int, 1024> buffer;
  const size_t cap = buffer.capacity();

  // With rounding, 1024+1=1025 rounds up to 2048, so capacity is 2047
  EXPECT_EQ(cap, 2047u);

  // Fill to capacity
  for (size_t i = 0; i < cap; ++i) {
    EXPECT_TRUE(buffer.try_push(static_cast<int>(i)));
  }
  EXPECT_TRUE(buffer.full());
  EXPECT_FALSE(buffer.try_push(9999));

  // Drain and verify
  for (size_t i = 0; i < cap; ++i) {
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, static_cast<int>(i));
  }
  EXPECT_TRUE(buffer.empty());
}

TEST(SPSCRingBuffer, LargeCapacityConcurrent) {
  SPSCRingBuffer<int, 1024> buffer;
  constexpr int kNumItems = 100000;

  std::atomic<bool> producerDone{false};
  std::vector<int> consumed;
  consumed.reserve(kNumItems);

  std::thread producer([&]() {
    for (int i = 0; i < kNumItems; ++i) {
      while (!buffer.try_push(i)) {
        std::this_thread::yield();
      }
    }
    producerDone.store(true, std::memory_order_release);
  });

  std::thread consumer([&]() {
    int value;
    while (consumed.size() < static_cast<size_t>(kNumItems)) {
      if (buffer.try_pop(value)) {
        consumed.push_back(value);
      } else if (producerDone.load(std::memory_order_acquire) && buffer.empty()) {
        break;
      } else {
        std::this_thread::yield();
      }
    }
  });

  producer.join();
  consumer.join();

  ASSERT_EQ(consumed.size(), kNumItems);
  for (int i = 0; i < kNumItems; ++i) {
    EXPECT_EQ(consumed[i], i);
  }
}

// =============================================================================
// Batch Operation Tests
// =============================================================================

TEST(SPSCRingBuffer, TryPushBatchBasic) {
  SPSCRingBuffer<int, 8> buffer;

  std::vector<int> items = {1, 2, 3, 4, 5};
  size_t pushed = buffer.try_push_batch(items.begin(), items.end());

  EXPECT_EQ(pushed, 5u);
  EXPECT_EQ(buffer.size(), 5u);

  // Verify order
  for (int i = 1; i <= 5; ++i) {
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, i);
  }
}

TEST(SPSCRingBuffer, TryPushBatchPartial) {
  // Use exact capacity mode for predictable partial push behavior
  SPSCRingBuffer<int, 4, false> buffer;

  // Try to push 6 items into capacity-4 buffer
  std::vector<int> items = {1, 2, 3, 4, 5, 6};
  size_t pushed = buffer.try_push_batch(items.begin(), items.end());

  EXPECT_EQ(pushed, 4u);
  EXPECT_TRUE(buffer.full());

  // Verify only first 4 items were pushed
  for (int i = 1; i <= 4; ++i) {
    int value;
    EXPECT_TRUE(buffer.try_pop(value));
    EXPECT_EQ(value, i);
  }
}

TEST(SPSCRingBuffer, TryPushBatchEmpty) {
  SPSCRingBuffer<int, 4> buffer;

  std::vector<int> items;
  size_t pushed = buffer.try_push_batch(items.begin(), items.end());

  EXPECT_EQ(pushed, 0u);
  EXPECT_TRUE(buffer.empty());
}

TEST(SPSCRingBuffer, TryPushBatchWhenFull) {
  // Use exact capacity mode for predictable full behavior
  SPSCRingBuffer<int, 4, false> buffer;

  // Fill the buffer
  for (int i = 0; i < 4; ++i) {
    buffer.try_push(i);
  }
  EXPECT_TRUE(buffer.full());

  std::vector<int> items = {10, 11, 12};
  size_t pushed = buffer.try_push_batch(items.begin(), items.end());

  EXPECT_EQ(pushed, 0u);
}

TEST(SPSCRingBuffer, TryPopBatchBasic) {
  SPSCRingBuffer<int, 8> buffer;

  for (int i = 1; i <= 5; ++i) {
    buffer.try_push(i);
  }

  std::vector<int> dest(5);
  size_t popped = buffer.try_pop_batch(dest.begin(), 5);

  EXPECT_EQ(popped, 5u);
  EXPECT_TRUE(buffer.empty());

  for (int i = 0; i < 5; ++i) {
    EXPECT_EQ(dest[i], i + 1);
  }
}

TEST(SPSCRingBuffer, TryPopBatchPartial) {
  SPSCRingBuffer<int, 8> buffer;

  for (int i = 1; i <= 3; ++i) {
    buffer.try_push(i);
  }

  std::vector<int> dest(5, -1);
  size_t popped = buffer.try_pop_batch(dest.begin(), 5);

  EXPECT_EQ(popped, 3u);
  EXPECT_TRUE(buffer.empty());

  // First 3 should be set
  EXPECT_EQ(dest[0], 1);
  EXPECT_EQ(dest[1], 2);
  EXPECT_EQ(dest[2], 3);
  // Rest should be unchanged
  EXPECT_EQ(dest[3], -1);
  EXPECT_EQ(dest[4], -1);
}

TEST(SPSCRingBuffer, TryPopBatchEmpty) {
  SPSCRingBuffer<int, 4> buffer;

  std::vector<int> dest(4, -1);
  size_t popped = buffer.try_pop_batch(dest.begin(), 4);

  EXPECT_EQ(popped, 0u);
  // Dest should be unchanged
  for (int v : dest) {
    EXPECT_EQ(v, -1);
  }
}

TEST(SPSCRingBuffer, BatchWithWrapAround) {
  // Use exact capacity mode for predictable wrap-around behavior
  SPSCRingBuffer<int, 4, false> buffer;

  // Push 3 items
  for (int i = 0; i < 3; ++i) {
    buffer.try_push(i);
  }

  // Pop 2 items (advances head)
  int v;
  buffer.try_pop(v);
  buffer.try_pop(v);

  // Now push a batch that will wrap around
  std::vector<int> items = {10, 11, 12, 13};
  size_t pushed = buffer.try_push_batch(items.begin(), items.end());

  EXPECT_EQ(pushed, 3u); // Only 3 slots available
  EXPECT_TRUE(buffer.full());

  // Pop all and verify order
  std::vector<int> dest(4);
  size_t popped = buffer.try_pop_batch(dest.begin(), 4);

  EXPECT_EQ(popped, 4u);
  EXPECT_EQ(dest[0], 2); // Remaining from initial push
  EXPECT_EQ(dest[1], 10);
  EXPECT_EQ(dest[2], 11);
  EXPECT_EQ(dest[3], 12);
}

TEST(SPSCRingBuffer, BatchConcurrent) {
  SPSCRingBuffer<int, 64> buffer;
  constexpr int kNumItems = 100000;
  constexpr int kBatchSize = 16;

  std::atomic<bool> producerDone{false};
  std::vector<int> consumed;
  consumed.reserve(kNumItems);

  std::thread producer([&]() {
    std::vector<int> batch(kBatchSize);
    for (int i = 0; i < kNumItems; i += kBatchSize) {
      int batchEnd = std::min(i + kBatchSize, kNumItems);
      for (int j = i; j < batchEnd; ++j) {
        batch[j - i] = j;
      }
      size_t remaining = batchEnd - i;
      auto it = batch.begin();
      while (remaining > 0) {
        size_t pushed = buffer.try_push_batch(it, it + remaining);
        it += pushed;
        remaining -= pushed;
        if (remaining > 0) {
          std::this_thread::yield();
        }
      }
    }
    producerDone.store(true, std::memory_order_release);
  });

  std::thread consumer([&]() {
    std::vector<int> batch(kBatchSize);
    while (consumed.size() < static_cast<size_t>(kNumItems)) {
      size_t popped = buffer.try_pop_batch(batch.begin(), kBatchSize);
      if (popped > 0) {
        for (size_t i = 0; i < popped; ++i) {
          consumed.push_back(batch[i]);
        }
      } else if (producerDone.load(std::memory_order_acquire) && buffer.empty()) {
        break;
      } else {
        std::this_thread::yield();
      }
    }
  });

  producer.join();
  consumer.join();

  ASSERT_EQ(consumed.size(), kNumItems);
  for (int i = 0; i < kNumItems; ++i) {
    EXPECT_EQ(consumed[i], i) << "Mismatch at index " << i;
  }
}

// =============================================================================
// Stress Tests
// =============================================================================

TEST(SPSCRingBuffer, StressTest) {
  SPSCRingBuffer<int, 64> buffer;
  constexpr int kNumItems = 1000000;

  std::atomic<int64_t> consumedSum{0};

  std::thread producer([&]() {
    for (int i = 1; i <= kNumItems; ++i) {
      while (!buffer.try_push(i)) {
        std::this_thread::yield();
      }
    }
  });

  std::thread consumer([&]() {
    int64_t localSum = 0;
    int count = 0;
    while (count < kNumItems) {
      int value;
      if (buffer.try_pop(value)) {
        localSum += value;
        ++count;
      } else {
        std::this_thread::yield();
      }
    }
    consumedSum.store(localSum, std::memory_order_relaxed);
  });

  producer.join();
  consumer.join();

  // Sum of 1 to N = N*(N+1)/2
  int64_t expectedSum = static_cast<int64_t>(kNumItems) * (kNumItems + 1) / 2;
  EXPECT_EQ(consumedSum.load(), expectedSum);
}


================================================
FILE: tests/task_set_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/task_set.h>

#include <atomic>
#include <thread>
#include <vector>

#include <gtest/gtest.h>

// Add a shim to account for older gtest
#if !defined INSTANTIATE_TEST_SUITE_P
#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
#endif // INSTANTIATE_TEST_CASE_P

enum ScheduleType { kDefault, kForceQueue, kMixed };

class TaskSetTest : public ::testing::TestWithParam<ScheduleType> {
 protected:
  void SetUp() override {}
  void TearDown() override {}

  template <typename TaskSetT, typename F>
  void schedule(TaskSetT& taskSet, F&& f) {
    switch (GetParam()) {
      case kDefault:
        taskSet.schedule(std::forward<F>(f));
        break;
      case kForceQueue:
        taskSet.schedule(std::forward<F>(f), dispenso::ForceQueuingTag());
        break;
      case kMixed:
        if (count_++ & 1) {
          taskSet.schedule(std::forward<F>(f));
        } else {
          taskSet.schedule(std::forward<F>(f), dispenso::ForceQueuingTag());
        }
        break;
    }
  }

 private:
  size_t count_ = 0;
};

INSTANTIATE_TEST_SUITE_P(
    TaskSetTestParameters,
    TaskSetTest,
    testing::Values(kDefault, kForceQueue, kMixed));

TEST_P(TaskSetTest, MixedWork) {
  constexpr size_t kWorkItems = 10000;
  std::vector<size_t> outputsA(kWorkItems, 0);
  std::vector<size_t> outputsB(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet taskSet(pool);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& a = outputsA[i];
    auto& b = outputsB[i];
    schedule(taskSet, [i, &a]() { a = i * i; });
    schedule(taskSet, [i, &b]() { b = i * i * i; });
  }

  taskSet.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsA[i], i * i);
    EXPECT_EQ(outputsB[i], i * i * i);
  }
}

TEST_P(TaskSetTest, MultiWait) {
  constexpr size_t kWorkItems = 10000;
  std::vector<size_t> outputsA(kWorkItems, 0);
  std::vector<size_t> outputsB(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet taskSet(pool);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& a = outputsA[i];
    auto& b = outputsB[i];
    schedule(taskSet, [i, &a]() { a = i * i; });
    schedule(taskSet, [i, &b]() { b = i * i * i; });
  }

  taskSet.wait();

  std::vector<int64_t> outputsC(kWorkItems, 0);
  std::vector<int64_t> outputsD(kWorkItems, 0);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& c = outputsC[i];
    auto& d = outputsD[i];
    schedule(taskSet, [i, &c]() { c = static_cast<int64_t>(i * i - 5); });
    schedule(taskSet, [i, &d]() { d = static_cast<int64_t>(i * i * i - 5); });
  }

  taskSet.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsA[i], i * i);
    EXPECT_EQ(outputsB[i], i * i * i);
    EXPECT_EQ(outputsC[i], i * i - 5);
    EXPECT_EQ(outputsD[i], i * i * i - 5);
  }
}

TEST_P(TaskSetTest, MultiSet) {
  constexpr size_t kWorkItems = 10000;
  std::vector<size_t> outputsA(kWorkItems, 0);
  std::vector<size_t> outputsB(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet taskSetA(pool);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& a = outputsA[i];
    auto& b = outputsB[i];
    schedule(taskSetA, [i, &a]() { a = i * i; });
    schedule(taskSetA, [i, &b]() { b = i * i * i; });
  }

  std::vector<int64_t> outputsC(kWorkItems, 0);
  std::vector<int64_t> outputsD(kWorkItems, 0);
  dispenso::TaskSet taskSetB(pool);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& c = outputsC[i];
    auto& d = outputsD[i];
    schedule(taskSetB, [i, &c]() { c = static_cast<int64_t>(i * i - 5); });
    schedule(taskSetB, [i, &d]() { d = static_cast<int64_t>(i * i * i - 5); });
  }

  taskSetA.wait();
  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsA[i], i * i);
    EXPECT_EQ(outputsB[i], i * i * i);
  }

  taskSetB.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsC[i], i * i - 5);
    EXPECT_EQ(outputsD[i], i * i * i - 5);
  }
}

TEST_P(TaskSetTest, MultiSetTryWait) {
  constexpr size_t kWorkItems = 10000;
  std::vector<size_t> outputsA(kWorkItems, 0);
  std::vector<size_t> outputsB(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet taskSetA(pool);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& a = outputsA[i];
    auto& b = outputsB[i];
    schedule(taskSetA, [i, &a]() { a = i * i; });
    schedule(taskSetA, [i, &b]() { b = i * i * i; });
    (void)taskSetA.tryWait(1);
  }

  std::vector<int64_t> outputsC(kWorkItems, 0);
  std::vector<int64_t> outputsD(kWorkItems, 0);
  dispenso::TaskSet taskSetB(pool);
  for (size_t i = 0; i < kWorkItems; ++i) {
    auto& c = outputsC[i];
    auto& d = outputsD[i];
    schedule(taskSetB, [i, &c]() { c = static_cast<int64_t>(i * i - 5); });
    schedule(taskSetB, [i, &d]() { d = static_cast<int64_t>(i * i * i - 5); });
    (void)taskSetB.tryWait(1);
  }

  while (!taskSetA.tryWait(1)) {
  }
  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsA[i], i * i);
    EXPECT_EQ(outputsB[i], i * i * i);
  }

  taskSetB.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsC[i], i * i - 5);
    EXPECT_EQ(outputsD[i], i * i * i - 5);
  }
}

TEST(TaskSetTest, ParamConstruction) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet taskSetA(pool);
  dispenso::TaskSet taskSetB(pool, 4);
  dispenso::TaskSet taskSetC(pool, dispenso::ParentCascadeCancel::kOn);
  dispenso::TaskSet taskSetD(pool, dispenso::ParentCascadeCancel::kOff, 4);
}

TEST(ConcurrentTaskSetTest, ParamConstruction) {
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet taskSetA(pool);
  dispenso::ConcurrentTaskSet taskSetB(pool, 4);
  dispenso::ConcurrentTaskSet taskSetC(pool, dispenso::ParentCascadeCancel::kOn);
  dispenso::ConcurrentTaskSet taskSetD(pool, dispenso::ParentCascadeCancel::kOff, 4);
}

static void recursiveFunc(dispenso::ThreadPool& pool, int num) {
  if (num <= 0)
    return;
  std::atomic<int> value(0);
  dispenso::TaskSet taskSet(pool);
  for (int i = 0; i < num; ++i) {
    taskSet.schedule([i, &value, &pool]() {
      recursiveFunc(pool, i - 1);
      ++value;
    });
  }
  taskSet.wait();
  EXPECT_EQ(value.load(), num);
}

TEST(TaskSet, Recursive) {
  dispenso::ThreadPool pool(10);
  recursiveFunc(pool, 20);
}

struct Node {
  int val;
  std::unique_ptr<Node> left, right;
};

static void buildTree(dispenso::ConcurrentTaskSet& tasks, std::unique_ptr<Node>& node, int depth) {
  if (depth) {
    node = std::make_unique<Node>();
    node->val = depth;
    tasks.schedule([&tasks, &left = node->left, depth]() { buildTree(tasks, left, depth - 1); });
    tasks.schedule([&tasks, &right = node->right, depth]() { buildTree(tasks, right, depth - 1); });
  }
}

static void verifyTree(const std::unique_ptr<Node>& node, int depthRemaining) {
  if (depthRemaining) {
    ASSERT_EQ(node->val, depthRemaining);
    verifyTree(node->left, depthRemaining - 1);
    verifyTree(node->right, depthRemaining - 1);
  }
}

TEST(ConcurrentTaskSet, DoTree) {
  std::unique_ptr<Node> root;
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);
  buildTree(tasks, root, 20);
  tasks.wait();
  verifyTree(root, 20);
}

TEST(TaskSet, OneChildCancels) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  tasks.schedule(
      []() {
        while (!dispenso::parentTaskSet()->canceled())
          ;
      },
      dispenso::ForceQueuingTag());

  tasks.schedule([]() { dispenso::parentTaskSet()->cancel(); }, dispenso::ForceQueuingTag());
  EXPECT_TRUE(tasks.wait());
}

TEST(TaskSet, ParentThreadCancels) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  tasks.schedule(
      []() {
        while (!dispenso::parentTaskSet()->canceled())
          ;
      },
      dispenso::ForceQueuingTag());

  tasks.cancel();
  EXPECT_TRUE(tasks.wait());
}

TEST(TaskSet, CascadingCancelOne) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  tasks.schedule(
      [&pool]() {
        dispenso::TaskSet tasks2(pool, dispenso::ParentCascadeCancel::kOn);
        tasks2.schedule([]() {
          while (!dispenso::parentTaskSet()->canceled())
            ;
        });
      },
      dispenso::ForceQueuingTag());

  tasks.cancel();
  EXPECT_TRUE(tasks.wait());
}

TEST(TaskSet, CascadingOne) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  int a = 5;

  tasks.schedule(
      [&pool, &a]() {
        dispenso::TaskSet tasks2(pool);
        tasks2.schedule([&a]() { a = 7; });
      },
      dispenso::ForceQueuingTag());

  EXPECT_FALSE(tasks.wait());
  EXPECT_EQ(a, 7);
}

TEST(TaskSet, CascadingManyCancel) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  // Use nonconst instead of constexpr to avoid older MSVC error
  size_t kBranchFactor = 200;

  std::vector<size_t> values(kBranchFactor * kBranchFactor);

  for (size_t i = 0; i < kBranchFactor; ++i) {
    tasks.schedule(
        [&pool, kBranchFactor]() {
          dispenso::TaskSet tasks2(pool, dispenso::ParentCascadeCancel::kOn);
          for (size_t j = 0; j < kBranchFactor; ++j) {
            tasks2.schedule([]() {
              while (!dispenso::parentTaskSet()->canceled()) {
              }
            });
          }
        },
        dispenso::ForceQueuingTag());
  }

  std::this_thread::sleep_for(std::chrono::milliseconds(10));
  tasks.cancel();

  EXPECT_TRUE(tasks.wait());
}

TEST(TaskSet, CascadingMany) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  // Use nonconst instead of constexpr to avoid older MSVC error
  size_t kBranchFactor = 200;

  std::vector<size_t> values(kBranchFactor * kBranchFactor);

  for (size_t i = 0; i < kBranchFactor; ++i) {
    tasks.schedule(
        [&pool, &values, i, kBranchFactor]() {
          dispenso::TaskSet tasks2(pool);
          for (size_t j = 0; j < kBranchFactor; ++j) {
            tasks2.schedule(
                [&values, i, j, kBranchFactor]() { values[i * kBranchFactor + j] = i + j; });
          }
        },
        dispenso::ForceQueuingTag());
  }
  EXPECT_FALSE(tasks.wait());
  for (size_t i = 0; i < kBranchFactor; ++i) {
    for (size_t j = 0; j < kBranchFactor; ++j) {
      EXPECT_EQ(i + j, values[i * kBranchFactor + j]);
    }
  }
}

#if defined(__cpp_exceptions)
TEST(TaskSet, Exception) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); }, dispenso::ForceQueuingTag());

  bool caught = false;
  try {
    tasks.wait();
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(ConcurrentTaskSet, Exception) {
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); }, dispenso::ForceQueuingTag());

  bool caught = false;
  try {
    tasks.wait();
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(TaskSet, ExceptionNoForceQueuing) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); });

  bool caught = false;
  try {
    tasks.wait();
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(ConcurrentTaskSet, ExceptionNoForceQueuing) {
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); });

  bool caught = false;
  try {
    tasks.wait();
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(TaskSet, ExceptionTryWait) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  int data = 32767;

  tasks.schedule([]() { throw std::logic_error("oops"); }, dispenso::ForceQueuingTag());

  bool caught = false;
  try {
    while (!tasks.tryWait(1)) {
    }
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(ConcurrentTaskSet, ExceptionTryWait) {
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); }, dispenso::ForceQueuingTag());

  bool caught = false;
  try {
    while (!tasks.tryWait(1)) {
    }
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(TaskSet, ExceptionNoForceQueuingTryWait) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); });

  bool caught = false;
  try {
    while (!tasks.tryWait(1)) {
    }
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(ConcurrentTaskSet, ExceptionNoForceQueuingTryWait) {
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  int data = 3;

  tasks.schedule([]() { throw std::logic_error("oops"); });

  bool caught = false;
  try {
    while (!tasks.tryWait(1)) {
    }
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

TEST(TaskSet, ExceptionCancels) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  int data = 3;

  tasks.schedule(
      []() {
        while (!dispenso::parentTaskSet()->canceled())
          ;
      },
      dispenso::ForceQueuingTag());

  tasks.schedule([]() { throw std::logic_error("oops"); }, dispenso::ForceQueuingTag());

  bool caught = false;
  try {
    tasks.wait();
    EXPECT_EQ(data, 12);
  } catch (...) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}

#endif // __cpp_exceptions

TEST(TaskSet, EmptyTaskSet) {
  // Test that an empty TaskSet can be waited on without issues
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);

  // wait() on empty TaskSet should return immediately with no cancellation
  EXPECT_FALSE(tasks.wait());

  // Should be able to reuse after empty wait
  std::atomic<int> counter{0};
  tasks.schedule([&counter]() { counter.fetch_add(1); });
  EXPECT_FALSE(tasks.wait());
  EXPECT_EQ(counter.load(), 1);
}

TEST(TaskSet, TryWaitOnEmpty) {
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);

  // tryWait on empty TaskSet should return true immediately
  EXPECT_TRUE(tasks.tryWait(0));
  EXPECT_TRUE(tasks.tryWait(1));
  EXPECT_TRUE(tasks.tryWait(100));
}

TEST(TaskSet, CanceledStateBeforeCancellation) {
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);

  // Before any cancellation, canceled() should return false
  EXPECT_FALSE(tasks.canceled());

  std::atomic<bool> sawNotCanceled{false};
  tasks.schedule([&sawNotCanceled]() {
    if (dispenso::parentTaskSet() && !dispenso::parentTaskSet()->canceled()) {
      sawNotCanceled.store(true);
    }
  });

  tasks.wait();
  EXPECT_TRUE(sawNotCanceled.load());
  EXPECT_FALSE(tasks.canceled());
}

TEST(TaskSet, SingleTask) {
  dispenso::ThreadPool pool(4);
  dispenso::TaskSet tasks(pool);

  int result = 0;
  tasks.schedule([&result]() { result = 42; });
  tasks.wait();

  EXPECT_EQ(result, 42);
}

TEST(TaskSet, GlobalThreadPool) {
  // Explicitly test with globalThreadPool()
  dispenso::TaskSet tasks(dispenso::globalThreadPool());

  std::atomic<int> sum{0};
  for (int i = 0; i < 100; ++i) {
    tasks.schedule([&sum, i]() { sum.fetch_add(i); });
  }
  tasks.wait();

  // Sum of 0..99 = 4950
  EXPECT_EQ(sum.load(), 4950);
}

TEST(ConcurrentTaskSet, EmptyTaskSet) {
  dispenso::ThreadPool pool(4);
  dispenso::ConcurrentTaskSet tasks(pool);

  // wait() on empty ConcurrentTaskSet should return immediately
  EXPECT_FALSE(tasks.wait());
}

TEST(ConcurrentTaskSet, TryWaitOnEmpty) {
  dispenso::ThreadPool pool(4);
  dispenso::ConcurrentTaskSet tasks(pool);

  EXPECT_TRUE(tasks.tryWait(0));
  EXPECT_TRUE(tasks.tryWait(1));
}

TEST(ConcurrentTaskSet, ConcurrentScheduling) {
  // Test that multiple threads can schedule to a ConcurrentTaskSet simultaneously
  dispenso::ThreadPool pool(8);
  dispenso::ConcurrentTaskSet tasks(pool);

  std::atomic<int> counter{0};
  static constexpr int kTasksPerThread = 100;
  static constexpr int kNumSchedulers = 4;

  // Create threads that will concurrently schedule tasks
  std::vector<std::thread> schedulers;
  for (int t = 0; t < kNumSchedulers; ++t) {
    schedulers.emplace_back([&]() {
      for (int i = 0; i < kTasksPerThread; ++i) {
        tasks.schedule([&counter]() { counter.fetch_add(1); });
      }
    });
  }

  // Wait for all schedulers to finish
  for (auto& t : schedulers) {
    t.join();
  }

  // Wait for all tasks to complete
  tasks.wait();

  EXPECT_EQ(counter.load(), kTasksPerThread * kNumSchedulers);
}

TEST(TaskSet, LargeBatchOfTasks) {
  dispenso::ThreadPool pool(8);
  dispenso::TaskSet tasks(pool);

  constexpr int kNumTasks = 10000;
  std::vector<int> results(kNumTasks, 0);

  for (int i = 0; i < kNumTasks; ++i) {
    tasks.schedule([&results, i]() { results[static_cast<size_t>(i)] = i * i; });
  }

  tasks.wait();

  for (int i = 0; i < kNumTasks; ++i) {
    EXPECT_EQ(results[static_cast<size_t>(i)], i * i);
  }
}

TEST(ConcurrentTaskSet, CanceledState) {
  dispenso::ThreadPool pool(4);
  dispenso::ConcurrentTaskSet tasks(pool);

  EXPECT_FALSE(tasks.canceled());

  tasks.schedule(
      []() {
        while (!dispenso::parentTaskSet()->canceled()) {
          std::this_thread::yield();
        }
      },
      dispenso::ForceQueuingTag());

  // Give the task time to start
  std::this_thread::sleep_for(std::chrono::milliseconds(1));

  tasks.cancel();
  EXPECT_TRUE(tasks.canceled());
  EXPECT_TRUE(tasks.wait());
}

TEST(TaskSet, ScheduleBulkBasic) {
  constexpr size_t kWorkItems = 1000;
  std::vector<size_t> outputs(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  tasks.scheduleBulk(
      kWorkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i * i; }; });

  tasks.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[i], i * i);
  }
}

TEST(TaskSet, ScheduleBulkEdgeCases) {
  dispenso::ThreadPool pool(10);

  // Count=0: no-op
  {
    dispenso::TaskSet tasks(pool);
    bool called = false;
    tasks.scheduleBulk(0, [&called](size_t) {
      called = true;
      return []() {};
    });
    EXPECT_FALSE(tasks.wait());
    EXPECT_FALSE(called);
  }

  // Count=1: single task
  {
    dispenso::TaskSet tasks(pool);
    int result = 0;
    tasks.scheduleBulk(
        1, [&result](size_t i) { return [i, &result]() { result = static_cast<int>(i) + 42; }; });
    EXPECT_FALSE(tasks.wait());
    EXPECT_EQ(result, 42);
  }

  // Count=2: two tasks
  {
    dispenso::TaskSet tasks(pool);
    std::vector<int> outputs(2, 0);
    tasks.scheduleBulk(2, [&outputs](size_t i) {
      return [i, &outputs]() { outputs[i] = static_cast<int>(i) + 1; };
    });
    EXPECT_FALSE(tasks.wait());
    EXPECT_EQ(outputs[0], 1);
    EXPECT_EQ(outputs[1], 2);
  }
}

TEST(TaskSet, ScheduleBulkLarge) {
  constexpr size_t kWorkItems = 10000;
  std::vector<size_t> outputs(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  tasks.scheduleBulk(
      kWorkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i * i; }; });

  tasks.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[i], i * i);
  }
}

TEST(TaskSet, ScheduleBulkMultipleWaits) {
  constexpr size_t kWorkItems = 1000;
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  std::vector<size_t> outputsA(kWorkItems, 0);
  tasks.scheduleBulk(
      kWorkItems, [&outputsA](size_t i) { return [i, &outputsA]() { outputsA[i] = i + 1; }; });
  tasks.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsA[i], i + 1);
  }

  std::vector<size_t> outputsB(kWorkItems, 0);
  tasks.scheduleBulk(
      kWorkItems, [&outputsB](size_t i) { return [i, &outputsB]() { outputsB[i] = i * 2; }; });
  tasks.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsB[i], i * 2);
  }
}

TEST(TaskSet, ScheduleBulkMixedWithSchedule) {
  static constexpr size_t kBulkItems = 500;
  static constexpr size_t kScheduleItems = 500;
  static constexpr size_t kTotal = kBulkItems + kScheduleItems + kBulkItems;
  std::vector<size_t> outputs(kTotal, 0);
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  // First bulk batch
  tasks.scheduleBulk(
      kBulkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i + 1; }; });

  // Individual schedule
  for (size_t i = kBulkItems; i < kBulkItems + kScheduleItems; ++i) {
    tasks.schedule([i, &outputs]() { outputs[i] = i + 1; });
  }

  // Second bulk batch
  tasks.scheduleBulk(kBulkItems, [&outputs](size_t i) {
    size_t idx = i + kBulkItems + kScheduleItems;
    return [idx, &outputs]() { outputs[idx] = idx + 1; };
  });

  tasks.wait();

  for (size_t i = 0; i < kTotal; ++i) {
    EXPECT_EQ(outputs[i], i + 1);
  }
}

TEST(TaskSet, ScheduleBulkCancellation) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  // Schedule spinning tasks via individual schedule with ForceQueuingTag
  // to ensure they land on pool threads
  for (int i = 0; i < 4; ++i) {
    tasks.schedule(
        []() {
          while (!dispenso::parentTaskSet()->canceled()) {
            std::this_thread::yield();
          }
        },
        dispenso::ForceQueuingTag());
  }

  // Bulk-schedule more work
  std::vector<int> outputs(100, 0);
  tasks.scheduleBulk(100, [&outputs](size_t i) {
    return [i, &outputs]() { outputs[i] = static_cast<int>(i) + 1; };
  });

  tasks.cancel();
  EXPECT_TRUE(tasks.wait());
}

#if defined(__cpp_exceptions)
TEST(TaskSet, ScheduleBulkException) {
  dispenso::ThreadPool pool(10);
  dispenso::TaskSet tasks(pool);

  tasks.scheduleBulk(100, [](size_t i) {
    return [i]() {
      if (i == 50) {
        throw std::logic_error("bulk oops");
      }
    };
  });

  bool caught = false;
  try {
    tasks.wait();
  } catch (const std::logic_error&) {
    caught = true;
  }

  EXPECT_TRUE(caught);
}
#endif // __cpp_exceptions

TEST(ConcurrentTaskSet, ScheduleBulkBasic) {
  constexpr size_t kWorkItems = 1000;
  std::vector<size_t> outputs(kWorkItems, 0);
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  tasks.scheduleBulk(
      kWorkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i * i; }; });

  tasks.wait();

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[i], i * i);
  }
}

TEST(ConcurrentTaskSet, ScheduleBulkEdgeCases) {
  dispenso::ThreadPool pool(10);

  // Count=0: no-op
  {
    dispenso::ConcurrentTaskSet tasks(pool);
    bool called = false;
    tasks.scheduleBulk(0, [&called](size_t) {
      called = true;
      return []() {};
    });
    EXPECT_FALSE(tasks.wait());
    EXPECT_FALSE(called);
  }

  // Count=1: single task
  {
    dispenso::ConcurrentTaskSet tasks(pool);
    std::atomic<int> result{0};
    tasks.scheduleBulk(1, [&result](size_t i) {
      return [i, &result]() { result.store(static_cast<int>(i) + 42); };
    });
    EXPECT_FALSE(tasks.wait());
    EXPECT_EQ(result.load(), 42);
  }

  // Count=2: two tasks
  {
    dispenso::ConcurrentTaskSet tasks(pool);
    std::atomic<int> count{0};
    tasks.scheduleBulk(2, [&count](size_t) { return [&count]() { count.fetch_add(1); }; });
    EXPECT_FALSE(tasks.wait());
    EXPECT_EQ(count.load(), 2);
  }
}

TEST(ConcurrentTaskSet, ScheduleBulkConcurrent) {
  static constexpr int kTasksPerThread = 100;
  static constexpr int kNumSchedulers = 4;
  std::atomic<int> counter{0};
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  std::vector<std::thread> schedulers;
  for (int t = 0; t < kNumSchedulers; ++t) {
    schedulers.emplace_back([&tasks, &counter]() {
      tasks.scheduleBulk(static_cast<size_t>(kTasksPerThread), [&counter](size_t) {
        return [&counter]() { counter.fetch_add(1, std::memory_order_relaxed); };
      });
    });
  }

  for (auto& t : schedulers) {
    t.join();
  }

  tasks.wait();
  EXPECT_EQ(counter.load(), kTasksPerThread * kNumSchedulers);
}

TEST(ConcurrentTaskSet, ScheduleBulkMixedWithSchedule) {
  static constexpr size_t kBulkItems = 500;
  static constexpr size_t kScheduleItems = 500;
  static constexpr size_t kTotal = kBulkItems + kScheduleItems + kBulkItems;
  std::vector<std::atomic<int>> outputs(kTotal);
  for (auto& o : outputs) {
    o.store(0);
  }
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  // First bulk batch
  tasks.scheduleBulk(kBulkItems, [&outputs](size_t i) {
    return [i, &outputs]() { outputs[i].store(static_cast<int>(i) + 1); };
  });

  // Individual schedule
  for (size_t i = kBulkItems; i < kBulkItems + kScheduleItems; ++i) {
    tasks.schedule([i, &outputs]() { outputs[i].store(static_cast<int>(i) + 1); });
  }

  // Second bulk batch
  tasks.scheduleBulk(kBulkItems, [&outputs](size_t i) {
    size_t idx = i + kBulkItems + kScheduleItems;
    return [idx, &outputs]() { outputs[idx].store(static_cast<int>(idx) + 1); };
  });

  tasks.wait();

  for (size_t i = 0; i < kTotal; ++i) {
    EXPECT_EQ(outputs[i].load(), static_cast<int>(i) + 1);
  }
}

TEST(ConcurrentTaskSet, ScheduleBulkCancellation) {
  dispenso::ThreadPool pool(10);
  dispenso::ConcurrentTaskSet tasks(pool);

  // Schedule spinning tasks via individual schedule with ForceQueuingTag
  for (int i = 0; i < 4; ++i) {
    tasks.schedule(
        []() {
          while (!dispenso::parentTaskSet()->canceled()) {
            std::this_thread::yield();
          }
        },
        dispenso::ForceQueuingTag());
  }

  // Bulk-schedule more work
  std::atomic<int> bulkCounter{0};
  tasks.scheduleBulk(
      100, [&bulkCounter](size_t) { return [&bulkCounter]() { bulkCounter.fetch_add(1); }; });

  tasks.cancel();
  EXPECT_TRUE(tasks.wait());
}


================================================
FILE: tests/test_tid.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <dispenso/platform.h>

// Note that this header is intended for direct inclusion into test cpps that require the
// functionality.  tids are essentially independent for tranlation units (don't expect
// coordinated/sane behavior if used from multiple cpps in the same binary).

namespace {

std::atomic<int> g_nextTid(0);
DISPENSO_THREAD_LOCAL int g_tid = -1;

inline void resetTestTid() {
  g_tid = -1;
  g_nextTid.store(0);
}

inline int getTestTid() {
  if (g_tid < 0) {
    g_tid = g_nextTid.fetch_add(1, std::memory_order_relaxed);
  }
  return g_tid;
}

} // namespace


================================================
FILE: tests/thread_id_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/thread_id.h>

#include <unordered_set>
#include <vector>

#include <gmock/gmock.h>
#include <gtest/gtest.h>

TEST(ThreadId, Repeatable) {
  constexpr int kRounds = 100;
  constexpr int kThreadsPerRound = 8;
  for (int round = 0; round < kRounds; ++round) {
    std::vector<std::thread> threads;
    for (int i = 0; i < kThreadsPerRound; ++i) {
      threads.emplace_back([]() {
        constexpr int kTrials = 1000;
        auto id = dispenso::threadId();

        for (int i = 0; i < kTrials; ++i) {
          EXPECT_EQ(id, dispenso::threadId());
        }
      });
    }

    for (auto& t : threads) {
      t.join();
    }
  }
}

TEST(ThreadId, Unique) {
  constexpr int kRounds = 1000;
  constexpr int kThreadsPerRound = 8;

  std::vector<uint64_t> ids(kRounds * kThreadsPerRound);
  std::atomic<size_t> slot(0);

  for (int round = 0; round < kRounds; ++round) {
    std::vector<std::thread> threads;
    for (int i = 0; i < kThreadsPerRound; ++i) {
      threads.emplace_back([&ids, &slot]() {
        ids[slot.fetch_add(1, std::memory_order_relaxed)] = dispenso::threadId();
      });
    }

    for (auto& t : threads) {
      t.join();
    }
  }

  std::unordered_set<uint64_t> uniquenessSet;
  for (uint64_t id : ids) {
    EXPECT_TRUE(uniquenessSet.insert(id).second);
  }
}


================================================
FILE: tests/thread_pool_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/thread_pool.h>

#include <atomic>
#include <thread>
#include <vector>

#include <gmock/gmock.h>
#include <gtest/gtest.h>

// Add a shim to account for older gtest
#if !defined INSTANTIATE_TEST_SUITE_P
#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
#endif // INSTANTIATE_TEST_CASE_P

using testing::AnyOf;
using testing::Eq;

TEST(ThreadPool, SimpleCreationDestruction) {
  dispenso::ThreadPool pool(10);
  EXPECT_EQ(pool.numThreads(), 10);
}

TEST(ThreadPool, Resize) {
  dispenso::ThreadPool pool(10);
  EXPECT_EQ(10, pool.numThreads());
  pool.resize(8);
  EXPECT_EQ(8, pool.numThreads());
  pool.resize(12);
  EXPECT_EQ(12, pool.numThreads());
  pool.resize(12);
  EXPECT_EQ(12, pool.numThreads());
}

enum ScheduleType { kDefault, kForceQueue, kMixed };

class ThreadPoolTest : public testing::TestWithParam<ScheduleType> {
 protected:
  void initPool(size_t threads) {
    pool_ = std::make_unique<dispenso::ThreadPool>(threads);
  }

  template <typename F>
  void schedule(F&& f) {
    switch (GetParam()) {
      case kDefault:
        pool_->schedule(std::forward<F>(f));
        break;
      case kForceQueue:
        pool_->schedule(std::forward<F>(f), dispenso::ForceQueuingTag());
        break;
      case kMixed:
        if (count_++ & 1) {
          pool_->schedule(std::forward<F>(f));
        } else {
          pool_->schedule(std::forward<F>(f), dispenso::ForceQueuingTag());
        }
        break;
    }
  }

  void destroyPool() {
    pool_.reset();
  }

 protected:
  std::unique_ptr<dispenso::ThreadPool> pool_;
  size_t count_ = 0;
};

INSTANTIATE_TEST_SUITE_P(
    ThreadPoolTestParameters,
    ThreadPoolTest,
    testing::Values(kDefault, kForceQueue, kMixed));

TEST_P(ThreadPoolTest, SimpleWork) {
  constexpr int kWorkItems = 10000;
  std::vector<int> outputs(kWorkItems, 0);
  {
    initPool(10);
    int i = 0;
    for (int& o : outputs) {
      schedule([i, &o]() { o = i * i; });
      ++i;
    }
    destroyPool();
  }

  int i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST_P(ThreadPoolTest, MixedWork) {
  constexpr size_t kWorkItems = 10000;
  std::vector<size_t> outputsA(kWorkItems, 0);
  std::vector<size_t> outputsB(kWorkItems, 0);
  {
    initPool(10);
    for (size_t i = 0; i < kWorkItems; ++i) {
      auto& a = outputsA[i];
      auto& b = outputsB[i];
      schedule([i, &a]() { a = i * i; });
      schedule([i, &b]() { b = i * i * i; });
    }
    destroyPool();
  }

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputsA[i], i * i);
    EXPECT_EQ(outputsB[i], i * i * i);
  }
}

TEST(ThreadPool, ResizeConcurrent) {
  constexpr int kWorkItems = 10000;
  std::vector<int> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool pool(10);
    int i = 0;
    for (int& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; });
      ++i;

      if ((i & 127) == 0) {
        if (pool.numThreads() == 1) {
          pool.resize(10);
        }
        pool.resize(pool.numThreads() - 1);
      }
    }
    EXPECT_EQ(4, pool.numThreads());
  }

  int i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, ResizeMoreConcurrent) {
  constexpr int kWorkItems = 1000000;
  std::vector<int64_t> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool pool(10);

    std::thread resizer0([&pool]() {
      for (int i = 0; i < 2000; ++i) {
        pool.resize(4);
      }
    });

    std::thread resizer1([&pool]() {
      for (int i = 0; i < 2000; ++i) {
        pool.resize(8);
      }
    });

    int64_t i = 0;
    for (int64_t& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; });
      ++i;
    }
    resizer0.join();
    resizer1.join();

    EXPECT_THAT(static_cast<int>(pool.numThreads()), AnyOf(Eq(4), Eq(8)));
  }

  int64_t i = 0;
  for (int64_t o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, SetSignalingWakeConcurrent) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 1000000;
  std::vector<int64_t> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool pool(10);

    std::thread resetter0([&pool]() {
      for (int i = 0; i < 100; ++i) {
        pool.setSignalingWake(true, 1000us);
      }
    });

    std::thread resetter1([&pool]() {
      for (int i = 0; i < 100; ++i) {
        pool.setSignalingWake(false, 400us);
      }
    });

    int64_t i = 0;
    for (int64_t& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; });
      ++i;
    }
    resetter0.join();
    resetter1.join();

    EXPECT_EQ(static_cast<int>(pool.numThreads()), 10);
  }

  int64_t i = 0;
  for (int64_t o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, ResizeCheckApproxActualRunningThreads) {
  constexpr int kWorkItems = 1000000;
  std::vector<int64_t> outputs(kWorkItems, 0);
  std::atomic<int> completed(0);

  std::mutex mtx;
  std::set<std::thread::id> tidSet;

  dispenso::ThreadPool pool(1);

  pool.resize(8);

  int64_t i = 0;
  for (int64_t& o : outputs) {
    pool.schedule([i, &o, &completed, &mtx, &tidSet]() {
      o = i * i;
      completed.fetch_add(1, std::memory_order_release);
      std::lock_guard<std::mutex> lk(mtx);
      tidSet.insert(std::this_thread::get_id());
    });
    ++i;
  }

  while (completed.load(std::memory_order_acquire) < kWorkItems) {
  }

  // We choose > 2 because there is 1 original thread, and one schedule thread (main thread). In
  // order to not have an occasional flake, we choose much lower than 8, though this test is
  // fundamentally flawed in that it cannot guarantee flake-free behavior.  Thus we turn this
  // particular check off when running in TSAN.
#if !DISPENSO_HAS_TSAN
  EXPECT_GT(tidSet.size(), 2);
#endif //! DISPENSO_HAS_TSAN

  EXPECT_THAT(static_cast<int>(pool.numThreads()), AnyOf(Eq(4), Eq(8)));

  i = 0;
  for (int64_t o : outputs) {
    EXPECT_EQ(o, i * i) << " i = " << i;
    ++i;
  }
}

TEST_P(ThreadPoolTest, CrossPoolTest) {
  constexpr int kWorkItems = 10000;
  std::vector<int> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool otherPool(8);
    initPool(10);
    int i = 0;
    for (int& o : outputs) {
      schedule([i, &o, &otherPool]() { otherPool.schedule([i, &o]() { o = i * i; }); });
      ++i;
    }
    destroyPool();
  }

  int i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, SetSignalingWakeConcurrentZeroLatency) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 1000000;
  std::vector<int64_t> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool pool(10);

    std::thread resetter0([&pool]() {
      for (int i = 0; i < 100; ++i) {
        pool.setSignalingWake(true, 0us);
      }
    });

    std::thread resetter1([&pool]() {
      for (int i = 0; i < 100; ++i) {
        pool.setSignalingWake(false, 400us);
      }
    });

    int64_t i = 0;
    for (int64_t& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; });
      ++i;
    }
    resetter0.join();
    resetter1.join();

    EXPECT_EQ(static_cast<int>(pool.numThreads()), 10);
  }

  int64_t i = 0;
  for (int64_t o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST_P(ThreadPoolTest, SimpleWorkZeroLatencyPoll) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 10000;
  std::vector<int> outputs(kWorkItems, 0);

  int i;

  {
    initPool(10);
    pool_->setSignalingWake(true, 0us);

    i = 0;
    for (int& o : outputs) {
      schedule([i, &o]() { o = i * i; });
      ++i;
    }
    destroyPool();
  }

  i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }

  // switch scheduling method

  {
    initPool(10);
    pool_->setSignalingWake(false, 0us);

    i = 0;
    for (int& o : outputs) {
      schedule([i, &o]() { o = i * i; });
      ++i;
    }
    destroyPool();
  }

  i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

// Tests for spin-poll with sleep mode (non-signaling wake)
// This mode was the previous default before signaling wake became default

TEST(ThreadPool, SpinPollWithSleep) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 1000;
  std::vector<int> outputs(kWorkItems, 0);

  {
    dispenso::ThreadPool pool(4);
    // Enable spin-poll with sleep mode (non-signaling)
    pool.setSignalingWake(false, 100us);

    int i = 0;
    for (int& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; });
      ++i;
    }
  }

  int i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, SpinPollWithSleepForceQueue) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 1000;
  std::vector<int> outputs(kWorkItems, 0);

  {
    dispenso::ThreadPool pool(4);
    pool.setSignalingWake(false, 200us);

    int i = 0;
    for (int& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; }, dispenso::ForceQueuingTag());
      ++i;
    }
  }

  int i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, SpinPollWithSleepResizeConcurrent) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 10000;
  std::vector<int> outputs(kWorkItems, 0);

  {
    dispenso::ThreadPool pool(8);
    pool.setSignalingWake(false, 50us);

    std::thread resizer([&pool]() {
      for (int j = 0; j < 20; ++j) {
        pool.resize(4);
        std::this_thread::sleep_for(1ms);
        pool.resize(8);
        std::this_thread::sleep_for(1ms);
      }
    });

    int i = 0;
    for (int& o : outputs) {
      pool.schedule([i, &o]() { o = i * i; });
      ++i;
    }

    resizer.join();
  }

  int i = 0;
  for (int o : outputs) {
    EXPECT_EQ(o, i * i);
    ++i;
  }
}

TEST(ThreadPool, TransitionBetweenModes) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 500;
  std::vector<int> outputs(kWorkItems, 0);

  dispenso::ThreadPool pool(4);

  // Start in signaling wake mode (default)
  int i = 0;
  for (; i < kWorkItems / 3; ++i) {
    pool.schedule([i, &outputs]() { outputs[static_cast<size_t>(i)] = i * 2; });
  }

  // Switch to spin-poll with sleep
  pool.setSignalingWake(false, 100us);
  for (; i < 2 * kWorkItems / 3; ++i) {
    pool.schedule([i, &outputs]() { outputs[static_cast<size_t>(i)] = i * 2; });
  }

  // Switch back to signaling wake
  pool.setSignalingWake(true, 100us);
  for (; i < kWorkItems; ++i) {
    pool.schedule([i, &outputs]() { outputs[static_cast<size_t>(i)] = i * 2; });
  }

  // Wait for completion by destroying pool
  pool.resize(0);

  for (i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[static_cast<size_t>(i)], i * 2);
  }
}

TEST(ThreadPool, SpinPollWithDifferentSleepDurations) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 200;

  for (auto sleepDur : {10us, 50us, 100us, 500us, 1000us}) {
    std::vector<int> outputs(kWorkItems, 0);

    {
      dispenso::ThreadPool pool(4);
      pool.setSignalingWake(false, sleepDur);

      for (int i = 0; i < kWorkItems; ++i) {
        pool.schedule([i, &outputs]() { outputs[static_cast<size_t>(i)] = i + 1; });
      }
    }

    for (int i = 0; i < kWorkItems; ++i) {
      EXPECT_EQ(outputs[static_cast<size_t>(i)], i + 1)
          << "Failed with sleep duration " << sleepDur.count() << "us";
    }
  }
}

TEST(ThreadPool, SingleThreadSpinPoll) {
  using namespace std::chrono_literals;
  constexpr int kWorkItems = 100;
  std::vector<int> outputs(kWorkItems, 0);

  {
    dispenso::ThreadPool pool(1);
    pool.setSignalingWake(false, 100us);

    for (int i = 0; i < kWorkItems; ++i) {
      pool.schedule([i, &outputs]() { outputs[static_cast<size_t>(i)] = i * 3; });
    }
  }

  for (int i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[static_cast<size_t>(i)], i * 3);
  }
}

TEST(ThreadPool, ScheduleBulkBasic) {
  constexpr size_t kWorkItems = 1000;
  std::vector<size_t> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool pool(10);
    pool.scheduleBulk(
        kWorkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i * i; }; });
  }

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[i], i * i);
  }
}

TEST(ThreadPool, ScheduleBulkZero) {
  dispenso::ThreadPool pool(10);
  bool called = false;
  pool.scheduleBulk(0, [&called](size_t) {
    called = true;
    return []() {};
  });
  // Generator should never be called for count=0
  EXPECT_FALSE(called);
}

TEST(ThreadPool, ScheduleBulkOne) {
  std::atomic<int> result{0};
  {
    dispenso::ThreadPool pool(10);
    pool.scheduleBulk(1, [&result](size_t i) {
      return [i, &result]() { result.store(static_cast<int>(i) + 42); };
    });
  }
  EXPECT_EQ(result.load(), 42);
}

TEST(ThreadPool, ScheduleBulkTwo) {
  std::vector<int> outputs(2, 0);
  {
    dispenso::ThreadPool pool(10);
    pool.scheduleBulk(2, [&outputs](size_t i) {
      return [i, &outputs]() { outputs[i] = static_cast<int>(i) + 1; };
    });
  }
  EXPECT_EQ(outputs[0], 1);
  EXPECT_EQ(outputs[1], 2);
}

TEST(ThreadPool, ScheduleBulkLarge) {
  constexpr size_t kWorkItems = 100000;
  std::vector<size_t> outputs(kWorkItems, 0);
  {
    dispenso::ThreadPool pool(10);
    pool.scheduleBulk(
        kWorkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i * i; }; });
  }

  for (size_t i = 0; i < kWorkItems; ++i) {
    EXPECT_EQ(outputs[i], i * i);
  }
}

TEST(ThreadPool, ScheduleBulkMixedWithSchedule) {
  static constexpr size_t kBulkItems = 500;
  static constexpr size_t kScheduleItems = 500;
  static constexpr size_t kTotal = kBulkItems + kScheduleItems + kBulkItems;
  std::vector<size_t> outputs(kTotal, 0);
  {
    dispenso::ThreadPool pool(10);

    // First bulk batch: indices [0, 500)
    pool.scheduleBulk(
        kBulkItems, [&outputs](size_t i) { return [i, &outputs]() { outputs[i] = i + 1; }; });

    // Individual schedule: indices [500, 1000)
    for (size_t i = kBulkItems; i < kBulkItems + kScheduleItems; ++i) {
      pool.schedule([i, &outputs]() { outputs[i] = i + 1; });
    }

    // Second bulk batch: indices [1000, 1500)
    pool.scheduleBulk(kBulkItems, [&outputs](size_t i) {
      size_t idx = i + kBulkItems + kScheduleItems;
      return [idx, &outputs]() { outputs[idx] = idx + 1; };
    });
  }

  for (size_t i = 0; i < kTotal; ++i) {
    EXPECT_EQ(outputs[i], i + 1);
  }
}

TEST(ThreadPool, ScheduleBulkConcurrent) {
  static constexpr int kTasksPerThread = 1000;
  static constexpr int kNumSchedulers = 4;
  std::atomic<int> counter{0};
  {
    dispenso::ThreadPool pool(10);
    std::vector<std::thread> schedulers;
    for (int t = 0; t < kNumSchedulers; ++t) {
      schedulers.emplace_back([&pool, &counter]() {
        pool.scheduleBulk(static_cast<size_t>(kTasksPerThread), [&counter](size_t) {
          return [&counter]() { counter.fetch_add(1, std::memory_order_relaxed); };
        });
      });
    }
    for (auto& t : schedulers) {
      t.join();
    }
  }
  EXPECT_EQ(counter.load(), kTasksPerThread * kNumSchedulers);
}

TEST(ThreadPool, RepeatedCreationDestruction) {
  // Verify that creating and destroying many thread pools completes promptly.
  // This exercises the destructor's bulk-wake shutdown path and catches
  // regressions where threads wait for epoch timeouts instead of waking
  // immediately (particularly visible under TSAN).
  for (int i = 0; i < 50; ++i) {
    dispenso::ThreadPool pool(16);
    EXPECT_EQ(pool.numThreads(), 16);
  }
}


================================================
FILE: tests/timed_task_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#if defined(_WIN32) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS // We don't want warnings regarding the getenv use in this test.
#endif

#include <dispenso/completion_event.h>
#include <dispenso/schedulable.h>
#include <dispenso/task_set.h>
#include <dispenso/timed_task.h>

#include <gtest/gtest.h>

using namespace std::chrono_literals;

// Some baseline error epsilon values.
constexpr double kp50Epsilon = 0.5e-3;
constexpr double kp90Epsilon = 1.0e-3;
constexpr double kp95Epsilon = 2.0e-3;

bool g_isRealtime = false;

// Adjust basic epsilons via multipliers based on real-time status and platform.
double errAdjust(double eps) {
  if (g_isRealtime) {
#if defined(__MACH__)
    return eps * 0.5;
#elif defined(_WIN32)
    return 2.0 * eps;
#else
    return eps * 0.33;
#endif // platform
  } else {
    return eps;
  }
}

dispenso::TimedTaskScheduler& testScheduler() {
  static const bool rtEnv = getenv("TEST_WITH_REALTIME") != nullptr;
  if (rtEnv) {
    static dispenso::TimedTaskScheduler sched(dispenso::ThreadPriority::kRealtime);
    if (!g_isRealtime) {
      std::cerr << "Running with real-time priority" << std::endl;
    }
    g_isRealtime = true;
    return sched;
  } else {
    return dispenso::globalTimedTaskScheduler();
  }
}

TEST(TimedTaskTest, RunLikelyZeroTimes) {
  // 40 ms
  constexpr double kWaitLen = 0.04;

  double start;
  double calledTime;
  bool calledAtAll = false;

  {
    dispenso::ThreadPool pool(1);
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &calledAtAll]() {
          calledTime = dispenso::getTime();
          calledAtAll = true;
          return true;
        },
        start + kWaitLen);
  }

  if (calledAtAll) {
    EXPECT_GT(calledTime - start, kWaitLen - errAdjust(kp95Epsilon));
    EXPECT_LT(calledTime - start, kWaitLen + errAdjust(kp95Epsilon));
  }
}

TEST(TimedTaskTest, MoveAndRunLikelyZeroTimes) {
  // 40 ms
  constexpr double kWaitLen = 0.04;

  double start;
  double calledTime;
  bool calledAtAll = false;

  {
    dispenso::ThreadPool pool(1);
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &calledAtAll]() {
          calledTime = dispenso::getTime();
          calledAtAll = true;
          return true;
        },
        start + kWaitLen);

    dispenso::TimedTask movedTo = std::move(task);
  }

  if (calledAtAll) {
    EXPECT_GT(calledTime - start, kWaitLen - errAdjust(kp95Epsilon));
    EXPECT_LT(calledTime - start, kWaitLen + errAdjust(kp95Epsilon));
  }
}

TEST(TimedTaskTest, RunOnce) {
  // 40 ms
  constexpr double kWaitLen = 0.04;

  double start;
  double calledTime;
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &fin]() {
          calledTime = dispenso::getTime();
          fin.notify();
          return true;
        },
        start + kWaitLen);

    fin.wait();
  }

  EXPECT_GT(calledTime - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime - start, kWaitLen + errAdjust(kp95Epsilon));
}

constexpr size_t k100Times = 100;

TEST(TimedTaskTest, RunPeriodic) {
  // 2 ms
  constexpr double kWaitLen = 0.002;

  double start;
  std::vector<double> calledTimes(k100Times);
  std::atomic<size_t> count(0);
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTimes, &count, &fin]() {
          auto cur = count.fetch_add(1, std::memory_order_release);
          calledTimes[cur] = dispenso::getTime();
          if (cur + 1 == k100Times) {
            fin.notify();
            return false;
          }
          return true;
        },
        start + kWaitLen,
        kWaitLen,
        k100Times);
    fin.wait();
  }

  double prev = start;
  for (double& t : calledTimes) {
    double absT = std::abs(kWaitLen - (t - prev));
    prev = t;
    t = absT;
  }

  std::sort(calledTimes.begin(), calledTimes.end());

  EXPECT_LE(calledTimes[50], errAdjust(kp50Epsilon));
  EXPECT_LE(calledTimes[90], errAdjust(kp90Epsilon));
  EXPECT_LE(calledTimes[95], errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunPeriodicSteady) {
  // 2 ms
  constexpr double kWaitLen = 0.002;

  double start;
  std::vector<double> calledTimes(k100Times);
  std::atomic<size_t> count(0);
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTimes, &count, &fin]() {
          auto cur = count.fetch_add(1, std::memory_order_release);
          calledTimes[cur] = dispenso::getTime();
          if (cur + 1 == k100Times) {
            fin.notify();
            return false;
          }
          return true;
        },
        start + kWaitLen,
        kWaitLen,
        k100Times,
        dispenso::TimedTaskType::kSteady);

    fin.wait();
  }

  double prev = start;
  for (double& t : calledTimes) {
    double absT = std::abs(kWaitLen - (t - prev));
    prev += kWaitLen;
    t = absT;
  }

  std::sort(calledTimes.begin(), calledTimes.end());

  EXPECT_LE(calledTimes[50], errAdjust(kp50Epsilon));
  EXPECT_LE(calledTimes[90], errAdjust(kp90Epsilon));
  EXPECT_LE(calledTimes[95], errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunPeriodicDontWait) {
  // 2 ms
  constexpr double kWaitLen = 0.002;

  double start;
  std::vector<double> calledTimes(2 * k100Times);
  std::atomic<size_t> count(0);
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTimes, &count, &fin]() {
          auto cur = count.fetch_add(1, std::memory_order_release);
          calledTimes[cur] = dispenso::getTime();
          if (cur + 1 == k100Times) {
            fin.notify();
            return false;
          }
          return true;
        },
        start + kWaitLen,
        kWaitLen,
        calledTimes.size());
    fin.wait();
  }

  EXPECT_GE(count.load(std::memory_order_acquire), k100Times);

  calledTimes.resize(k100Times);

  double prev = start;
  for (double& t : calledTimes) {
    double absT = std::abs(kWaitLen - (t - prev));
    prev = t;
    t = absT;
  }

  std::sort(calledTimes.begin(), calledTimes.end());

  EXPECT_LE(calledTimes[50], errAdjust(kp50Epsilon));
  EXPECT_LE(calledTimes[90], errAdjust(kp90Epsilon));
  EXPECT_LE(calledTimes[95], errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunPeriodicSteadyUnderLoad) {
  // 2 ms
  constexpr double kWaitLen = 0.002;

  double start;
  std::vector<double> calledTimes(k100Times);
  std::atomic<size_t> count(0);
  std::atomic<size_t> dummy(0);

  dispenso::ThreadPool& pool = dispenso::globalThreadPool();
  dispenso::ConcurrentTaskSet tasks(pool);
  dispenso::CompletionEvent fin;

  tasks.schedule([&tasks, &count, times = k100Times, &dummy]() {
    while (count.load(std::memory_order_acquire) < times) {
      tasks.schedule([&dummy]() { dummy.fetch_add(1, std::memory_order_relaxed); });
    }
  });

  {
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTimes, &count, &fin]() {
          auto cur = count.fetch_add(1, std::memory_order_acq_rel);
          calledTimes[cur] = dispenso::getTime();
          if (cur + 1 == k100Times) {
            fin.notify();
            return false;
          }
          return true;
        },
        start + kWaitLen,
        kWaitLen,
        k100Times,
        dispenso::TimedTaskType::kSteady);
    fin.wait();
  }

  tasks.wait();

  double prev = start;
  for (double& t : calledTimes) {
    double absT = std::abs(kWaitLen - (t - prev));
    prev += kWaitLen;
    t = absT;
  }

  std::sort(calledTimes.begin(), calledTimes.end());

#if !DISPENSO_HAS_TSAN
  double kMultiplier = g_isRealtime ? 1.0 : 5.0;
#else
  double kMultiplier = 15.0;
#endif // TSAN

  EXPECT_LE(calledTimes[50], kMultiplier * errAdjust(kp50Epsilon));
  EXPECT_LE(calledTimes[90], kMultiplier * errAdjust(kp90Epsilon));
  EXPECT_LE(calledTimes[95], kMultiplier * errAdjust(kp95Epsilon));
}

constexpr size_t k10Times = 10;

TEST(TimedTaskTest, RunDetach) {
  // 2 ms
  constexpr double kWaitLen = 0.007;

  double start;
  std::vector<double> calledTimes(k10Times);
  std::atomic<size_t> count(0);
  dispenso::CompletionEvent fin;

  {
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        dispenso::globalThreadPool(),
        [&calledTimes, &count, &fin]() {
          auto cur = count.fetch_add(1, std::memory_order_release);
          calledTimes[cur] = dispenso::getTime();
          if (cur + 1 == k10Times) {
            fin.notify();
          }
          return true;
        },
        start + kWaitLen,
        kWaitLen,
        k10Times);
    task.detach();
  }

  fin.wait();

  double prev = start;
  for (double& t : calledTimes) {
    double absT = std::abs(kWaitLen - (t - prev));
    prev = t;
    t = absT;
  }

  std::sort(calledTimes.begin(), calledTimes.end());

  EXPECT_LE(calledTimes[5], errAdjust(kp50Epsilon));
  EXPECT_LE(calledTimes[9], errAdjust(kp90Epsilon));
}

TEST(TimedTaskTest, RunChronoDelayByDuration) {
  // 40 ms
  constexpr auto kWaitLenChrono = 40ms;
  constexpr double kWaitLen = 0.04;

  double start;
  double calledTime;
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &fin]() {
          calledTime = dispenso::getTime();
          fin.notify();
          return true;
        },
        kWaitLenChrono);

    fin.wait();
  }

  EXPECT_GT(calledTime - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime - start, kWaitLen + errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunChronoDelayToTimePoint) {
  // 40 ms
  constexpr auto kWaitLenChrono = 40ms;
  constexpr double kWaitLen = 0.04;

  double start;
  double calledTime;
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &fin]() {
          calledTime = dispenso::getTime();
          fin.notify();
          return true;
        },
        std::chrono::high_resolution_clock::now() + kWaitLenChrono);

    fin.wait();
  }

  EXPECT_GT(calledTime - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime - start, kWaitLen + errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunChronoDelayByDurationWithPeriod) {
  constexpr auto kWaitLenChrono = 6ms;
  constexpr double kWaitLen = 0.006;
  constexpr auto kPeriodChrono = 2ms;
  constexpr auto kPeriod = 0.002;

  double start;
  double calledTime[2];
  std::atomic<int> slot(0);

  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &slot, &fin]() {
          auto cur = slot.fetch_add(1, std::memory_order_acq_rel);
          calledTime[cur] = dispenso::getTime();
          if (cur + 1 == 2) {
            fin.notify();
            return false;
          }
          return true;
        },
        kWaitLenChrono,
        kPeriodChrono,
        2);

    fin.wait();
  }

  EXPECT_GT(calledTime[0] - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[0] - start, kWaitLen + errAdjust(kp95Epsilon));

  EXPECT_GT(calledTime[1] - calledTime[0], kPeriod - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[1] - calledTime[0], kPeriod + errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunChronoDelayByDurationWithPeriodSteady) {
  constexpr auto kWaitLenChrono = 6ms;
  constexpr double kWaitLen = 0.006;
  constexpr auto kPeriodChrono = 2ms;
  constexpr auto kPeriod = 0.002;

  double start;
  double calledTime[2];
  std::atomic<int> slot(0);

  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &slot, &fin]() {
          auto cur = slot.fetch_add(1, std::memory_order_acq_rel);
          calledTime[cur] = dispenso::getTime();
          if (cur + 1 == 2) {
            fin.notify();
            return false;
          }
          return true;
        },
        kWaitLenChrono,
        kPeriodChrono,
        2);

    fin.wait();
  }

  EXPECT_GT(calledTime[0] - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[0] - start, kWaitLen + errAdjust(kp95Epsilon));

  EXPECT_GT(calledTime[1] - start, (kWaitLen + kPeriod) - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[1] - start, (kWaitLen + kPeriod) + errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunChronoDelayToTimepointWithPeriod) {
  constexpr auto kWaitLenChrono = 6ms;
  constexpr double kWaitLen = 0.006;
  constexpr auto kPeriodChrono = 2ms;
  constexpr auto kPeriod = 0.002;

  double start;
  double calledTime[2];
  std::atomic<int> slot(0);

  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &slot, &fin]() {
          auto cur = slot.fetch_add(1, std::memory_order_acq_rel);
          calledTime[cur] = dispenso::getTime();
          if (cur + 1 == 2) {
            fin.notify();
            return false;
          }
          return true;
        },
        std::chrono::high_resolution_clock::now() + kWaitLenChrono,
        kPeriodChrono,
        2);

    fin.wait();
  }

  EXPECT_GT(calledTime[0] - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[0] - start, kWaitLen + errAdjust(kp95Epsilon));

  EXPECT_GT(calledTime[1] - calledTime[0], kPeriod - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[1] - calledTime[0], kPeriod + errAdjust(kp95Epsilon));
}

TEST(TimedTaskTest, RunChronoDelayToTimepointWithPeriodSteady) {
  constexpr auto kWaitLenChrono = 6ms;
  constexpr double kWaitLen = 0.006;
  constexpr auto kPeriodChrono = 2ms;
  constexpr auto kPeriod = 0.002;

  double start;
  double calledTime[2];
  std::atomic<int> slot(0);

  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &slot, &fin]() {
          auto cur = slot.fetch_add(1, std::memory_order_acq_rel);
          calledTime[cur] = dispenso::getTime();
          if (cur + 1 == 2) {
            fin.notify();
            return false;
          }
          return true;
        },
        std::chrono::high_resolution_clock::now() + kWaitLenChrono,
        kPeriodChrono,
        2);

    fin.wait();
  }

  EXPECT_GT(calledTime[0] - start, kWaitLen - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[0] - start, kWaitLen + errAdjust(kp95Epsilon));

  EXPECT_GT(calledTime[1] - start, (kWaitLen + kPeriod) - errAdjust(kp95Epsilon));
  EXPECT_LT(calledTime[1] - start, (kWaitLen + kPeriod) + errAdjust(kp95Epsilon));
}

constexpr size_t kRunForever = std::numeric_limits<size_t>::max();

TEST(TimedTaskTest, RunOnceImmediatelyLongPeriod) {
  constexpr double kWaitLen = 0.0;
  constexpr double kPeriod = 100.0;

  double start;
  double calledTime;
  dispenso::CompletionEvent fin;

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&calledTime, &fin]() {
          calledTime = dispenso::getTime();
          fin.notify();
          return true;
        },
        start + kWaitLen,
        kPeriod,
        kRunForever);

    fin.wait();
  }

  EXPECT_GT(calledTime - start, kWaitLen - errAdjust(kp50Epsilon));
  EXPECT_LT(calledTime - start, kWaitLen + errAdjust(kp50Epsilon));
}

constexpr int kTerminateCount = 7;

TEST(TimedTaskTest, CancelViaProvidedFunction) {
  constexpr double kWaitLen = 0.004;
  // Make the period very tiny
  constexpr double kPeriod = 50e-6;

  double start;
  std::atomic<int> counter(0);

  {
    dispenso::ImmediateInvoker pool;
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        pool,
        [&counter]() {
          if (counter.fetch_add(1, std::memory_order_acq_rel) + 1 == kTerminateCount) {
            return false;
          }
          return true;
        },
        start + kWaitLen,
        kPeriod,
        kRunForever);

    while (counter.load(std::memory_order_acquire) != kTerminateCount) {
    }

    // Task should be terminating
    std::this_thread::sleep_for(10ms);
  }

  EXPECT_EQ(counter.load(std::memory_order_acquire), kTerminateCount);
}

TEST(TimedTaskTest, CancelViaProvidedFunctionInThreadPool) {
  constexpr double kWaitLen = 0.004;
  // Make the period very tiny
  constexpr double kPeriod = 50e-6;

  double start;
  std::atomic<int> counter(0);

  {
    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        dispenso::globalThreadPool(),
        [&counter]() {
          if (counter.fetch_add(1, std::memory_order_acq_rel) + 1 == kTerminateCount) {
            return false;
          }
          return true;
        },
        start + kWaitLen,
        kPeriod,
        kRunForever);

    while (counter.load(std::memory_order_acquire) != kTerminateCount) {
    }

    // Task should be terminating
    std::this_thread::sleep_for(10ms);
  }

  EXPECT_GE(counter.load(std::memory_order_acquire), kTerminateCount);
  EXPECT_LE(counter.load(std::memory_order_acquire), kTerminateCount + 1);
}

TEST(TimedTaskTest, FunctionDestructionPostTaskDestruct) {
  constexpr double kWaitLen = 0.004;
  // Make the period very tiny
  constexpr double kPeriod = 50e-6;

  double start;

  {
    std::vector<size_t> dummyObj1;

    struct Dummy {
      std::vector<size_t>& vec;

      ~Dummy() {
        if (vec.size() != 0) {
          std::cerr << "INVALID CALL TO DESTRUCTOR" << std::endl;
          std::abort();
        }
      }

    } dummy{dummyObj1};

    start = dispenso::getTime();
    dispenso::TimedTask task = testScheduler().schedule(
        dispenso::globalThreadPool(),
        [dummy]() { return true; },
        start + kWaitLen,
        kPeriod,
        kRunForever);
    // Here task goes out of scope
  }

  // Goal: passed function destructor should never execute out here... if it does it will
  // incorrectly reference value that went out of scope
}


================================================
FILE: tests/timing_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/timing.h>

#include <chrono>
#include <cmath>
#include <thread>

#include <gtest/gtest.h>

// These tests are disabled under TSAN because timing-based tests are inherently
// sensitive to instrumentation overhead and thread scheduling, making them flaky
// in sanitizer builds.

#if !DISPENSO_HAS_TSAN

TEST(Timing, GetTimeReturnsNonNegative) {
  double t = dispenso::getTime();
  EXPECT_GE(t, 0.0);
}

TEST(Timing, GetTimeIsMonotonic) {
  double prev = dispenso::getTime();
  for (int i = 0; i < 100; ++i) {
    double cur = dispenso::getTime();
    EXPECT_GE(cur, prev);
    prev = cur;
  }
}

TEST(Timing, GetTimeProgresses) {
  double start = dispenso::getTime();
  std::this_thread::sleep_for(std::chrono::milliseconds(10));
  double end = dispenso::getTime();

  // After sleeping 10ms, time should have advanced by at least 5ms
  // (allowing for timing imprecision)
  EXPECT_GT(end - start, 0.005);
}

TEST(Timing, StatisticalAccuracy) {
  // Run many iterations and verify that most measurements are within tolerance
  // of std::chrono. This is a statistical test that allows for some outliers
  // due to OS scheduling, context switches, etc.

  constexpr int kIterations = 1000;
  constexpr int kMinSuccesses = 900; // 90% must pass
  constexpr double kToleranceSeconds = 2e-6; // 2 microseconds tolerance

  int successes = 0;

  for (int i = 0; i < kIterations; ++i) {
    auto chronoStart = std::chrono::high_resolution_clock::now();
    double dispensoStart = dispenso::getTime();

    // Small busy-wait to accumulate some time
    volatile int dummy = 0;
    for (int j = 0; j < 1000; ++j) {
      dummy += j;
    }
    (void)dummy;

    auto chronoEnd = std::chrono::high_resolution_clock::now();
    double dispensoEnd = dispenso::getTime();

    double chronoElapsed = std::chrono::duration<double>(chronoEnd - chronoStart).count();
    double dispensoElapsed = dispensoEnd - dispensoStart;

    // Check if the measurements are within tolerance
    double diff = std::abs(chronoElapsed - dispensoElapsed);
    if (diff < kToleranceSeconds) {
      ++successes;
    }
  }

  EXPECT_GE(successes, kMinSuccesses)
      << "Expected at least " << kMinSuccesses << " out of " << kIterations
      << " timing measurements to be within " << (kToleranceSeconds * 1e6)
      << "us tolerance, but only " << successes << " were.";
}

TEST(Timing, LongerDurationAccuracy) {
  // Test accuracy over a longer duration by comparing dispenso timing to chrono
  // We use sleep to introduce a delay, but measure with both timing systems
  constexpr int kIterations = 10;
  constexpr double kSleepSeconds = 0.02; // 20ms minimum sleep
  constexpr double kToleranceRatio = 0.10; // 10% tolerance between timing systems

  for (int i = 0; i < kIterations; ++i) {
    auto chronoStart = std::chrono::steady_clock::now();
    double dispensoStart = dispenso::getTime();

    std::this_thread::sleep_for(
        std::chrono::microseconds(static_cast<int64_t>(kSleepSeconds * 1e6)));

    double dispensoEnd = dispenso::getTime();
    auto chronoEnd = std::chrono::steady_clock::now();

    double dispensoElapsed = dispensoEnd - dispensoStart;
    double chronoElapsed = std::chrono::duration<double>(chronoEnd - chronoStart).count();

    // Both should report at least the sleep duration (sleep is a minimum wait)
    EXPECT_GE(dispensoElapsed, kSleepSeconds * 0.9)
        << "Iteration " << i << ": dispenso elapsed time " << dispensoElapsed
        << " is too short compared to sleep duration " << kSleepSeconds;

    // dispenso and chrono should agree within tolerance
    double diff = std::abs(chronoElapsed - dispensoElapsed);
    double maxDiff = chronoElapsed * kToleranceRatio;
    EXPECT_LE(diff, maxDiff) << "Iteration " << i << ": dispenso (" << dispensoElapsed
                             << "s) and chrono (" << chronoElapsed << "s) differ by " << diff
                             << "s, which exceeds " << (kToleranceRatio * 100) << "% tolerance";
  }
}

TEST(Timing, RapidCalls) {
  // Verify that rapid successive calls don't produce anomalies
  constexpr int kIterations = 10000;
  double times[kIterations];

  for (int i = 0; i < kIterations; ++i) {
    times[i] = dispenso::getTime();
  }

  // All times should be monotonically non-decreasing
  for (int i = 1; i < kIterations; ++i) {
    EXPECT_GE(times[i], times[i - 1]) << "Time went backwards at index " << i;
  }

  // Total elapsed time should be small (< 1 second for 10k calls)
  double totalElapsed = times[kIterations - 1] - times[0];
  EXPECT_LT(totalElapsed, 1.0) << "10000 getTime() calls took " << totalElapsed << " seconds";
}

#endif // !DISPENSO_HAS_TSAN


================================================
FILE: tests/util_test.cpp
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <dispenso/util.h>

#include <dispenso/platform.h>

#include <atomic>
#include <cstdint>
#include <memory>
#include <thread>
#include <vector>

#include <gtest/gtest.h>

using dispenso::ssize_t;

// Global test helpers for reference counting tests
int RefCountedCount = 0;
struct RefCounted {
  RefCounted() {
    ++RefCountedCount;
  }
  ~RefCounted() {
    --RefCountedCount;
  }
};

int NonTrivialConstructCount = 0;
int NonTrivialDestructCount = 0;
struct NonTrivial {
  int value;

  NonTrivial(int v) : value(v) {
    ++NonTrivialConstructCount;
  }

  NonTrivial(const NonTrivial& other) : value(other.value) {
    ++NonTrivialConstructCount;
  }

  NonTrivial(NonTrivial&& other) : value(other.value) {
    other.value = -1;
    ++NonTrivialConstructCount;
  }

  ~NonTrivial() {
    ++NonTrivialDestructCount;
  }
};

TEST(Util, AlignedMallocAndFree) {
  // Test basic allocation and deallocation
  void* ptr = dispenso::alignedMalloc(1024, 64);
  ASSERT_NE(ptr, nullptr);
  EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) % 64, 0u) << "Pointer should be 64-byte aligned";
  dispenso::alignedFree(ptr);

  // Test cache-line aligned version
  ptr = dispenso::alignedMalloc(256);
  ASSERT_NE(ptr, nullptr);
  EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) % dispenso::kCacheLineSize, 0u)
      << "Pointer should be cache-line aligned";
  dispenso::alignedFree(ptr);

  // Test nullptr is safe to free
  dispenso::alignedFree(nullptr);

  // Test various alignments
  for (size_t alignment : {8, 16, 32, 64, 128, 256}) {
    ptr = dispenso::alignedMalloc(100, alignment);
    ASSERT_NE(ptr, nullptr);
    EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) % alignment, 0u)
        << "Pointer should be aligned to " << alignment;
    dispenso::alignedFree(ptr);
  }
}

TEST(Util, AlignedMallocUsable) {
  // Verify allocated memory is actually usable
  constexpr size_t kSize = 1024;
  uint8_t* ptr = static_cast<uint8_t*>(dispenso::alignedMalloc(kSize, 64));
  ASSERT_NE(ptr, nullptr);

  // Write pattern to memory
  for (size_t i = 0; i < kSize; ++i) {
    ptr[i] = static_cast<uint8_t>(i & 0xFF);
  }

  // Verify pattern
  for (size_t i = 0; i < kSize; ++i) {
    EXPECT_EQ(ptr[i], static_cast<uint8_t>(i & 0xFF));
  }

  dispenso::alignedFree(ptr);
}

TEST(Util, AlignedDeleter) {
  // Test with unique_ptr
  {
    struct TestStruct {
      int value;
      ~TestStruct() {
        value = -1;
      }
    };

    void* mem = dispenso::alignedMalloc(sizeof(TestStruct), 64);
    ASSERT_NE(mem, nullptr);

    std::unique_ptr<TestStruct, dispenso::AlignedDeleter<TestStruct>> ptr(new (mem) TestStruct{42});
    EXPECT_EQ(ptr->value, 42);
  }
  // Deleter should have freed memory and called destructor

  // Test with shared_ptr - use global RefCounted
  RefCountedCount = 0;

  {
    void* mem = dispenso::alignedMalloc(sizeof(RefCounted), 64);
    std::shared_ptr<RefCounted> ptr1(
        new (mem) RefCounted(), dispenso::AlignedDeleter<RefCounted>());
    EXPECT_EQ(RefCountedCount, 1);

    std::shared_ptr<RefCounted> ptr2 = ptr1;
    EXPECT_EQ(RefCountedCount, 1);
  }

  EXPECT_EQ(RefCountedCount, 0);
}

TEST(Util, AlignToCacheLine) {
  // alignToCacheLine rounds up to next cache line boundary
  constexpr auto kCLS = dispenso::kCacheLineSize;
  // For 0, it stays 0 (already aligned)
  EXPECT_EQ(dispenso::alignToCacheLine(0), 0u);
  EXPECT_EQ(dispenso::alignToCacheLine(1), kCLS);
  EXPECT_EQ(dispenso::alignToCacheLine(kCLS - 1), kCLS);
  EXPECT_EQ(dispenso::alignToCacheLine(kCLS), kCLS);
  EXPECT_EQ(dispenso::alignToCacheLine(kCLS + 1), kCLS * 2);
  EXPECT_EQ(dispenso::alignToCacheLine(kCLS * 2), kCLS * 2);
  EXPECT_EQ(dispenso::alignToCacheLine(kCLS * 2 + 1), kCLS * 3);
}

TEST(Util, CpuRelax) {
  // Just verify it doesn't crash
  for (int i = 0; i < 10; ++i) {
    dispenso::cpuRelax();
  }
}

TEST(Util, NextPow2) {
  // Compile-time tests
  static_assert(dispenso::nextPow2(0) == 0, "nextPow2(0) should be 0");
  static_assert(dispenso::nextPow2(1) == 1, "nextPow2(1) should be 1");
  static_assert(dispenso::nextPow2(2) == 2, "nextPow2(2) should be 2");
  static_assert(dispenso::nextPow2(3) == 4, "nextPow2(3) should be 4");
  static_assert(dispenso::nextPow2(4) == 4, "nextPow2(4) should be 4");
  static_assert(dispenso::nextPow2(5) == 8, "nextPow2(5) should be 8");
  static_assert(dispenso::nextPow2(17) == 32, "nextPow2(17) should be 32");
  static_assert(dispenso::nextPow2(64) == 64, "nextPow2(64) should be 64");
  static_assert(dispenso::nextPow2(65) == 128, "nextPow2(65) should be 128");
  static_assert(dispenso::nextPow2(1000) == 1024, "nextPow2(1000) should be 1024");

  // Runtime tests
  EXPECT_EQ(dispenso::nextPow2(0), 0);
  EXPECT_EQ(dispenso::nextPow2(1), 1);
  EXPECT_EQ(dispenso::nextPow2(3), 4);
  EXPECT_EQ(dispenso::nextPow2(17), 32);
  EXPECT_EQ(dispenso::nextPow2(1ULL << 30), 1ULL << 30);
  EXPECT_EQ(dispenso::nextPow2((1ULL << 30) + 1), 1ULL << 31);
}

TEST(Util, Log2Const) {
  // Compile-time tests
  static_assert(dispenso::log2const(1) == 0, "log2(1) should be 0");
  static_assert(dispenso::log2const(2) == 1, "log2(2) should be 1");
  static_assert(dispenso::log2const(4) == 2, "log2(4) should be 2");
  static_assert(dispenso::log2const(8) == 3, "log2(8) should be 3");
  static_assert(dispenso::log2const(16) == 4, "log2(16) should be 4");
  static_assert(dispenso::log2const(64) == 6, "log2(64) should be 6");
  static_assert(dispenso::log2const(100) == 6, "log2(100) should be 6");
  static_assert(dispenso::log2const(127) == 6, "log2(127) should be 6");
  static_assert(dispenso::log2const(128) == 7, "log2(128) should be 7");

  // Runtime tests
  EXPECT_EQ(dispenso::log2const(1), 0u);
  EXPECT_EQ(dispenso::log2const(2), 1u);
  EXPECT_EQ(dispenso::log2const(64), 6u);
  EXPECT_EQ(dispenso::log2const(1024), 10u);
  EXPECT_EQ(dispenso::log2const(1ULL << 32), 32u);
  EXPECT_EQ(dispenso::log2const(1ULL << 63), 63u);
}

TEST(Util, Log2) {
  // Runtime log2 should match constexpr version
  EXPECT_EQ(dispenso::log2(1), dispenso::log2const(1));
  EXPECT_EQ(dispenso::log2(2), dispenso::log2const(2));
  EXPECT_EQ(dispenso::log2(64), dispenso::log2const(64));
  EXPECT_EQ(dispenso::log2(100), dispenso::log2const(100));
  EXPECT_EQ(dispenso::log2(1024), dispenso::log2const(1024));
  EXPECT_EQ(dispenso::log2(1ULL << 32), dispenso::log2const(1ULL << 32));

  // Test powers of 2
  for (uint32_t i = 0; i < 64; ++i) {
    uint64_t val = 1ULL << i;
    EXPECT_EQ(dispenso::log2(val), i);
  }

  // Test non-powers of 2 (floor behavior)
  EXPECT_EQ(dispenso::log2(3), 1u);
  EXPECT_EQ(dispenso::log2(5), 2u);
  EXPECT_EQ(dispenso::log2(127), 6u);
  EXPECT_EQ(dispenso::log2(129), 7u);
}

TEST(Util, AlignedBuffer) {
  struct TestType {
    alignas(32) char data[64];
  };

  dispenso::AlignedBuffer<TestType> buf;
  // The struct containing the buffer has proper alignment, not the char array itself
  EXPECT_GE(alignof(decltype(buf)), alignof(TestType));
  EXPECT_GE(sizeof(buf.b), sizeof(TestType));

  // Verify we can construct in the buffer
  TestType* obj = new (buf.b) TestType();
  obj->data[0] = 42;
  EXPECT_EQ(obj->data[0], 42);
  obj->~TestType();
}

TEST(Util, AlignedAtomic) {
  dispenso::AlignedAtomic<int> atomic;

  // Verify alignment
  EXPECT_EQ(reinterpret_cast<uintptr_t>(&atomic) % dispenso::kCacheLineSize, 0u);

  // Verify it works as atomic
  int val = 42;
  atomic.store(&val);
  EXPECT_EQ(atomic.load(), &val);

  // Test from multiple threads to verify atomicity
  std::atomic<bool> ready{false};
  std::atomic<bool> done{false};

  std::thread t([&]() {
    while (!ready.load(std::memory_order_acquire)) {
      dispenso::cpuRelax();
    }
    int* expected = nullptr;
    atomic.compare_exchange_strong(expected, &val);
    done.store(true, std::memory_order_release);
  });

  int* ptr = nullptr;
  atomic.store(ptr);
  ready.store(true, std::memory_order_release);

  while (!done.load(std::memory_order_acquire)) {
    dispenso::cpuRelax();
  }

  EXPECT_EQ(atomic.load(), &val);
  t.join();
}

TEST(Util, OpResult) {
  // Test default construction
  dispenso::OpResult<int> r1;
  EXPECT_FALSE(r1);
  EXPECT_FALSE(r1.has_value());

  // Test value construction
  dispenso::OpResult<int> r2(42);
  EXPECT_TRUE(r2);
  EXPECT_TRUE(r2.has_value());
  EXPECT_EQ(r2.value(), 42);

  // Test copy construction
  dispenso::OpResult<int> r3(r2);
  EXPECT_TRUE(r3);
  EXPECT_EQ(r3.value(), 42);

  // Test move construction
  dispenso::OpResult<int> r4(std::move(r3));
  EXPECT_TRUE(r4);
  EXPECT_EQ(r4.value(), 42);
  EXPECT_FALSE(r3); // Moved-from should be empty

  // Test copy assignment
  dispenso::OpResult<int> r5;
  r5 = r2;
  EXPECT_TRUE(r5);
  EXPECT_EQ(r5.value(), 42);

  // Test move assignment
  dispenso::OpResult<int> r6;
  r6 = std::move(r5);
  EXPECT_TRUE(r6);
  EXPECT_EQ(r6.value(), 42);
  EXPECT_FALSE(r5);

  // Test emplace
  dispenso::OpResult<int> r7;
  r7.emplace(100);
  EXPECT_TRUE(r7);
  EXPECT_EQ(r7.value(), 100);

  // Test self-assignment (suppressing warnings as this is intentional for testing)
  // clang-format off
  DISPENSO_DISABLE_WARNING_PUSH
#if defined(__clang__)
  DISPENSO_DISABLE_WARNING(-Wself-assign-overloaded)
#endif
#if defined(__clang__) || defined(__GNUC__)
  DISPENSO_DISABLE_WARNING(-Wself-move)
#endif
  // clang-format on
  r7 = r7;
  EXPECT_TRUE(r7);
  EXPECT_EQ(r7.value(), 100);

  r7 = std::move(r7);
  EXPECT_TRUE(r7);
  EXPECT_EQ(r7.value(), 100);
  DISPENSO_DISABLE_WARNING_POP
}

TEST(Util, OpResultWithNonTrivialType) {
  NonTrivialConstructCount = 0;
  NonTrivialDestructCount = 0;

  {
    dispenso::OpResult<NonTrivial> r1(NonTrivial(42));
    EXPECT_EQ(r1.value().value, 42);
    EXPECT_GT(NonTrivialConstructCount, 0);

    dispenso::OpResult<NonTrivial> r2(r1);
    EXPECT_EQ(r2.value().value, 42);

    r1.emplace(100);
    EXPECT_EQ(r1.value().value, 100);
  }

  // All objects should be destroyed
  EXPECT_EQ(NonTrivialConstructCount, NonTrivialDestructCount);
}

TEST(Util, StaticChunkSize) {
  // Test exact division
  auto chunking = dispenso::staticChunkSize(100, 10);
  EXPECT_EQ(chunking.ceilChunkSize, 10);
  EXPECT_EQ(chunking.transitionTaskIndex, 10);

  // Verify all items are accounted for
  ssize_t total = 0;
  for (ssize_t i = 0; i < 10; ++i) {
    ssize_t chunkSize =
        (i < chunking.transitionTaskIndex) ? chunking.ceilChunkSize : chunking.ceilChunkSize - 1;
    total += chunkSize;
  }
  EXPECT_EQ(total, 100);

  // Test with remainder
  chunking = dispenso::staticChunkSize(100, 8);
  EXPECT_EQ(chunking.ceilChunkSize, 13);
  EXPECT_EQ(chunking.transitionTaskIndex, 4);

  // Verify: 4 chunks of 13, 4 chunks of 12
  total = 0;
  for (ssize_t i = 0; i < 8; ++i) {
    ssize_t chunkSize =
        (i < chunking.transitionTaskIndex) ? chunking.ceilChunkSize : chunking.ceilChunkSize - 1;
    total += chunkSize;
  }
  EXPECT_EQ(total, 100);

  // Test edge cases
  chunking = dispenso::staticChunkSize(1, 10);
  EXPECT_EQ(chunking.ceilChunkSize, 1);
  EXPECT_EQ(chunking.transitionTaskIndex, 1);

  chunking = dispenso::staticChunkSize(7, 3);
  EXPECT_EQ(chunking.ceilChunkSize, 3);
  EXPECT_EQ(chunking.transitionTaskIndex, 1);
  // 1 chunk of 3, 2 chunks of 2 = 3 + 2 + 2 = 7
  total = 0;
  for (ssize_t i = 0; i < 3; ++i) {
    ssize_t chunkSize =
        (i < chunking.transitionTaskIndex) ? chunking.ceilChunkSize : chunking.ceilChunkSize - 1;
    total += chunkSize;
  }
  EXPECT_EQ(total, 7);
}

TEST(Util, StaticChunkSizeLoadBalancing) {
  // Verify load balancing is good
  // Maximum difference between chunks should be 1
  for (ssize_t items : {100, 101, 127, 256, 1000, 10001}) {
    for (ssize_t chunks : {2, 4, 7, 8, 16, 32}) {
      auto chunking = dispenso::staticChunkSize(items, chunks);

      ssize_t minChunk = chunking.ceilChunkSize - 1;
      ssize_t maxChunk = chunking.ceilChunkSize;

      EXPECT_LE(maxChunk - minChunk, 1)
          << "Chunk sizes should differ by at most 1 for items=" << items << " chunks=" << chunks;

      // Verify total
      ssize_t total = 0;
      for (ssize_t i = 0; i < chunks; ++i) {
        ssize_t chunkSize = (i < chunking.transitionTaskIndex) ? chunking.ceilChunkSize
                                                               : chunking.ceilChunkSize - 1;
        total += chunkSize;
      }
      EXPECT_EQ(total, items) << "Total items mismatch for items=" << items << " chunks=" << chunks;
    }
  }
}